xref: /petsc/src/mat/impls/baij/seq/baij.c (revision a0452e342fa39dddcd02a770babb2f6ac8b99ab3)
1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "petscsys.h"                     /*I "petscmat.h" I*/
11 
12 /*  UGLY, ugly, ugly
13    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
14    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
15    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
16    converts the entries into single precision and then calls ..._MatScalar() to put them
17    into the single precision data structures.
18 */
19 #if defined(PETSC_USE_MAT_SINGLE)
20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
21 #else
22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
23 #endif
24 #if defined(PETSC_HAVE_DSCPACK)
25 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat);
26 #endif
27 
28 #define CHUNKSIZE  10
29 
30 /*
31      Checks for missing diagonals
32 */
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
35 int MatMissingDiagonal_SeqBAIJ(Mat A)
36 {
37   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
38   int         *diag,*jj = a->j,i,ierr;
39 
40   PetscFunctionBegin;
41   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
42   diag = a->diag;
43   for (i=0; i<a->mbs; i++) {
44     if (jj[diag[i]] != i) {
45       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
46     }
47   }
48   PetscFunctionReturn(0);
49 }
50 
51 #undef __FUNCT__
52 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
53 int MatMarkDiagonal_SeqBAIJ(Mat A)
54 {
55   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
56   int         i,j,*diag,m = a->mbs,ierr;
57 
58   PetscFunctionBegin;
59   if (a->diag) PetscFunctionReturn(0);
60 
61   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
62   PetscLogObjectMemory(A,(m+1)*sizeof(int));
63   for (i=0; i<m; i++) {
64     diag[i] = a->i[i+1];
65     for (j=a->i[i]; j<a->i[i+1]; j++) {
66       if (a->j[j] == i) {
67         diag[i] = j;
68         break;
69       }
70     }
71   }
72   a->diag = diag;
73   PetscFunctionReturn(0);
74 }
75 
76 
77 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
78 
79 #undef __FUNCT__
80 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
81 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
82 {
83   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
84   int         ierr,n = a->mbs,i;
85 
86   PetscFunctionBegin;
87   *nn = n;
88   if (!ia) PetscFunctionReturn(0);
89   if (symmetric) {
90     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
91   } else if (oshift == 1) {
92     /* temporarily add 1 to i and j indices */
93     int nz = a->i[n];
94     for (i=0; i<nz; i++) a->j[i]++;
95     for (i=0; i<n+1; i++) a->i[i]++;
96     *ia = a->i; *ja = a->j;
97   } else {
98     *ia = a->i; *ja = a->j;
99   }
100 
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
106 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
107 {
108   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
109   int         i,n = a->mbs,ierr;
110 
111   PetscFunctionBegin;
112   if (!ia) PetscFunctionReturn(0);
113   if (symmetric) {
114     ierr = PetscFree(*ia);CHKERRQ(ierr);
115     ierr = PetscFree(*ja);CHKERRQ(ierr);
116   } else if (oshift == 1) {
117     int nz = a->i[n]-1;
118     for (i=0; i<nz; i++) a->j[i]--;
119     for (i=0; i<n+1; i++) a->i[i]--;
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
126 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
127 {
128   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
129 
130   PetscFunctionBegin;
131   *bs = baij->bs;
132   PetscFunctionReturn(0);
133 }
134 
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatDestroy_SeqBAIJ"
138 int MatDestroy_SeqBAIJ(Mat A)
139 {
140   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
141   int         ierr;
142 
143   PetscFunctionBegin;
144 #if defined(PETSC_USE_LOG)
145   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
146 #endif
147   ierr = PetscFree(a->a);CHKERRQ(ierr);
148   if (!a->singlemalloc) {
149     ierr = PetscFree(a->i);CHKERRQ(ierr);
150     ierr = PetscFree(a->j);CHKERRQ(ierr);
151   }
152   if (a->row) {
153     ierr = ISDestroy(a->row);CHKERRQ(ierr);
154   }
155   if (a->col) {
156     ierr = ISDestroy(a->col);CHKERRQ(ierr);
157   }
158   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
159   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
160   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
161   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
162   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
163   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
164   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
165 #if defined(PETSC_USE_MAT_SINGLE)
166   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
167 #endif
168   ierr = PetscFree(a);CHKERRQ(ierr);
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatSetOption_SeqBAIJ"
174 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
175 {
176   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
177 
178   PetscFunctionBegin;
179   switch (op) {
180   case MAT_ROW_ORIENTED:
181     a->roworiented    = PETSC_TRUE;
182     break;
183   case MAT_COLUMN_ORIENTED:
184     a->roworiented    = PETSC_FALSE;
185     break;
186   case MAT_COLUMNS_SORTED:
187     a->sorted         = PETSC_TRUE;
188     break;
189   case MAT_COLUMNS_UNSORTED:
190     a->sorted         = PETSC_FALSE;
191     break;
192   case MAT_KEEP_ZEROED_ROWS:
193     a->keepzeroedrows = PETSC_TRUE;
194     break;
195   case MAT_NO_NEW_NONZERO_LOCATIONS:
196     a->nonew          = 1;
197     break;
198   case MAT_NEW_NONZERO_LOCATION_ERR:
199     a->nonew          = -1;
200     break;
201   case MAT_NEW_NONZERO_ALLOCATION_ERR:
202     a->nonew          = -2;
203     break;
204   case MAT_YES_NEW_NONZERO_LOCATIONS:
205     a->nonew          = 0;
206     break;
207   case MAT_ROWS_SORTED:
208   case MAT_ROWS_UNSORTED:
209   case MAT_YES_NEW_DIAGONALS:
210   case MAT_IGNORE_OFF_PROC_ENTRIES:
211   case MAT_USE_HASH_TABLE:
212     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
213     break;
214   case MAT_NO_NEW_DIAGONALS:
215     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
216   case MAT_USE_SINGLE_PRECISION_SOLVES:
217     if (a->bs==4) {
218       PetscTruth sse_enabled_local,sse_enabled_global;
219       int        ierr;
220 
221       sse_enabled_local  = PETSC_FALSE;
222       sse_enabled_global = PETSC_FALSE;
223 
224       ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr);
225 #if defined(PETSC_HAVE_SSE)
226       if (sse_enabled_local) {
227           a->single_precision_solves = PETSC_TRUE;
228           A->ops->solve              = MatSolve_SeqBAIJ_Update;
229           A->ops->solvetranspose     = MatSolveTranspose_SeqBAIJ_Update;
230           PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n");
231           break;
232       } else {
233         PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
234       }
235 #else
236       PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
237 #endif
238     }
239     break;
240   default:
241     SETERRQ(PETSC_ERR_SUP,"unknown option");
242   }
243   PetscFunctionReturn(0);
244 }
245 
246 #undef __FUNCT__
247 #define __FUNCT__ "MatGetRow_SeqBAIJ"
248 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
249 {
250   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
251   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
252   MatScalar    *aa,*aa_i;
253   PetscScalar  *v_i;
254 
255   PetscFunctionBegin;
256   bs  = a->bs;
257   ai  = a->i;
258   aj  = a->j;
259   aa  = a->a;
260   bs2 = a->bs2;
261 
262   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
263 
264   bn  = row/bs;   /* Block number */
265   bp  = row % bs; /* Block Position */
266   M   = ai[bn+1] - ai[bn];
267   *nz = bs*M;
268 
269   if (v) {
270     *v = 0;
271     if (*nz) {
272       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
273       for (i=0; i<M; i++) { /* for each block in the block row */
274         v_i  = *v + i*bs;
275         aa_i = aa + bs2*(ai[bn] + i);
276         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
277       }
278     }
279   }
280 
281   if (idx) {
282     *idx = 0;
283     if (*nz) {
284       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
285       for (i=0; i<M; i++) { /* for each block in the block row */
286         idx_i = *idx + i*bs;
287         itmp  = bs*aj[ai[bn] + i];
288         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
289       }
290     }
291   }
292   PetscFunctionReturn(0);
293 }
294 
295 #undef __FUNCT__
296 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
297 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
298 {
299   int ierr;
300 
301   PetscFunctionBegin;
302   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
303   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
304   PetscFunctionReturn(0);
305 }
306 
307 #undef __FUNCT__
308 #define __FUNCT__ "MatTranspose_SeqBAIJ"
309 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
310 {
311   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
312   Mat         C;
313   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
314   int         *rows,*cols,bs2=a->bs2;
315   PetscScalar *array;
316 
317   PetscFunctionBegin;
318   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
319   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
320   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
321 
322 #if defined(PETSC_USE_MAT_SINGLE)
323   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
324   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
325 #else
326   array = a->a;
327 #endif
328 
329   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
330   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
331   ierr = PetscFree(col);CHKERRQ(ierr);
332   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
333   cols = rows + bs;
334   for (i=0; i<mbs; i++) {
335     cols[0] = i*bs;
336     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
337     len = ai[i+1] - ai[i];
338     for (j=0; j<len; j++) {
339       rows[0] = (*aj++)*bs;
340       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
341       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
342       array += bs2;
343     }
344   }
345   ierr = PetscFree(rows);CHKERRQ(ierr);
346 #if defined(PETSC_USE_MAT_SINGLE)
347   ierr = PetscFree(array);
348 #endif
349 
350   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
351   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
352 
353   if (B) {
354     *B = C;
355   } else {
356     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
357   }
358   PetscFunctionReturn(0);
359 }
360 
361 #undef __FUNCT__
362 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
363 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
364 {
365   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
366   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
367   PetscScalar *aa;
368   FILE        *file;
369 
370   PetscFunctionBegin;
371   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
372   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
373   col_lens[0] = MAT_FILE_COOKIE;
374 
375   col_lens[1] = A->m;
376   col_lens[2] = A->n;
377   col_lens[3] = a->nz*bs2;
378 
379   /* store lengths of each row and write (including header) to file */
380   count = 0;
381   for (i=0; i<a->mbs; i++) {
382     for (j=0; j<bs; j++) {
383       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
384     }
385   }
386   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
387   ierr = PetscFree(col_lens);CHKERRQ(ierr);
388 
389   /* store column indices (zero start index) */
390   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
391   count = 0;
392   for (i=0; i<a->mbs; i++) {
393     for (j=0; j<bs; j++) {
394       for (k=a->i[i]; k<a->i[i+1]; k++) {
395         for (l=0; l<bs; l++) {
396           jj[count++] = bs*a->j[k] + l;
397         }
398       }
399     }
400   }
401   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
402   ierr = PetscFree(jj);CHKERRQ(ierr);
403 
404   /* store nonzero values */
405   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
406   count = 0;
407   for (i=0; i<a->mbs; i++) {
408     for (j=0; j<bs; j++) {
409       for (k=a->i[i]; k<a->i[i+1]; k++) {
410         for (l=0; l<bs; l++) {
411           aa[count++] = a->a[bs2*k + l*bs + j];
412         }
413       }
414     }
415   }
416   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
417   ierr = PetscFree(aa);CHKERRQ(ierr);
418 
419   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
420   if (file) {
421     fprintf(file,"-matload_block_size %d\n",a->bs);
422   }
423   PetscFunctionReturn(0);
424 }
425 
426 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer);
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
430 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
431 {
432   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
433   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
434   PetscViewerFormat format;
435 
436   PetscFunctionBegin;
437   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
438   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
439     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
440   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
441     Mat aij;
442     ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr);
443     ierr = MatView(aij,viewer);CHKERRQ(ierr);
444     ierr = MatDestroy(aij);CHKERRQ(ierr);
445   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
446 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
447      ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr);
448 #endif
449      PetscFunctionReturn(0);
450   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
451     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
452     for (i=0; i<a->mbs; i++) {
453       for (j=0; j<bs; j++) {
454         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
455         for (k=a->i[i]; k<a->i[i+1]; k++) {
456           for (l=0; l<bs; l++) {
457 #if defined(PETSC_USE_COMPLEX)
458             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
459               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
460                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
461             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
462               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
463                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
464             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
465               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
466             }
467 #else
468             if (a->a[bs2*k + l*bs + j] != 0.0) {
469               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
470             }
471 #endif
472           }
473         }
474         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
475       }
476     }
477     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
478   } else {
479     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
480     for (i=0; i<a->mbs; i++) {
481       for (j=0; j<bs; j++) {
482         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
483         for (k=a->i[i]; k<a->i[i+1]; k++) {
484           for (l=0; l<bs; l++) {
485 #if defined(PETSC_USE_COMPLEX)
486             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
487               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
488                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
489             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
490               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
491                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
492             } else {
493               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
494             }
495 #else
496             ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
497 #endif
498           }
499         }
500         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
501       }
502     }
503     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
504   }
505   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
506   PetscFunctionReturn(0);
507 }
508 
509 #undef __FUNCT__
510 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
511 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
512 {
513   Mat          A = (Mat) Aa;
514   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
515   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
516   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
517   MatScalar    *aa;
518   PetscViewer  viewer;
519 
520   PetscFunctionBegin;
521 
522   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
523   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
524 
525   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
526 
527   /* loop over matrix elements drawing boxes */
528   color = PETSC_DRAW_BLUE;
529   for (i=0,row=0; i<mbs; i++,row+=bs) {
530     for (j=a->i[i]; j<a->i[i+1]; j++) {
531       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
532       x_l = a->j[j]*bs; x_r = x_l + 1.0;
533       aa = a->a + j*bs2;
534       for (k=0; k<bs; k++) {
535         for (l=0; l<bs; l++) {
536           if (PetscRealPart(*aa++) >=  0.) continue;
537           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
538         }
539       }
540     }
541   }
542   color = PETSC_DRAW_CYAN;
543   for (i=0,row=0; i<mbs; i++,row+=bs) {
544     for (j=a->i[i]; j<a->i[i+1]; j++) {
545       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
546       x_l = a->j[j]*bs; x_r = x_l + 1.0;
547       aa = a->a + j*bs2;
548       for (k=0; k<bs; k++) {
549         for (l=0; l<bs; l++) {
550           if (PetscRealPart(*aa++) != 0.) continue;
551           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
552         }
553       }
554     }
555   }
556 
557   color = PETSC_DRAW_RED;
558   for (i=0,row=0; i<mbs; i++,row+=bs) {
559     for (j=a->i[i]; j<a->i[i+1]; j++) {
560       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
561       x_l = a->j[j]*bs; x_r = x_l + 1.0;
562       aa = a->a + j*bs2;
563       for (k=0; k<bs; k++) {
564         for (l=0; l<bs; l++) {
565           if (PetscRealPart(*aa++) <= 0.) continue;
566           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
567         }
568       }
569     }
570   }
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
576 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
577 {
578   int          ierr;
579   PetscReal    xl,yl,xr,yr,w,h;
580   PetscDraw    draw;
581   PetscTruth   isnull;
582 
583   PetscFunctionBegin;
584 
585   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
586   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
587 
588   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
589   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
590   xr += w;    yr += h;  xl = -w;     yl = -h;
591   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
592   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
593   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatView_SeqBAIJ"
599 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
600 {
601   int        ierr;
602   PetscTruth issocket,isascii,isbinary,isdraw;
603 
604   PetscFunctionBegin;
605   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
606   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
607   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
608   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
609   if (issocket) {
610     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
611   } else if (isascii){
612     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
613   } else if (isbinary) {
614     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
615   } else if (isdraw) {
616     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
617   } else {
618     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
619   }
620   PetscFunctionReturn(0);
621 }
622 
623 
624 #undef __FUNCT__
625 #define __FUNCT__ "MatGetValues_SeqBAIJ"
626 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
627 {
628   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
629   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
630   int        *ai = a->i,*ailen = a->ilen;
631   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
632   MatScalar  *ap,*aa = a->a,zero = 0.0;
633 
634   PetscFunctionBegin;
635   for (k=0; k<m; k++) { /* loop over rows */
636     row  = im[k]; brow = row/bs;
637     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
638     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
639     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
640     nrow = ailen[brow];
641     for (l=0; l<n; l++) { /* loop over columns */
642       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
643       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
644       col  = in[l] ;
645       bcol = col/bs;
646       cidx = col%bs;
647       ridx = row%bs;
648       high = nrow;
649       low  = 0; /* assume unsorted */
650       while (high-low > 5) {
651         t = (low+high)/2;
652         if (rp[t] > bcol) high = t;
653         else             low  = t;
654       }
655       for (i=low; i<high; i++) {
656         if (rp[i] > bcol) break;
657         if (rp[i] == bcol) {
658           *v++ = ap[bs2*i+bs*cidx+ridx];
659           goto finished;
660         }
661       }
662       *v++ = zero;
663       finished:;
664     }
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 #if defined(PETSC_USE_MAT_SINGLE)
670 #undef __FUNCT__
671 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
672 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
673 {
674   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
675   int         ierr,i,N = m*n*b->bs2;
676   MatScalar   *vsingle;
677 
678   PetscFunctionBegin;
679   if (N > b->setvalueslen) {
680     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
681     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
682     b->setvalueslen  = N;
683   }
684   vsingle = b->setvaluescopy;
685   for (i=0; i<N; i++) {
686     vsingle[i] = v[i];
687   }
688   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
689   PetscFunctionReturn(0);
690 }
691 #endif
692 
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
696 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
697 {
698   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
699   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
700   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
701   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
702   PetscTruth  roworiented=a->roworiented;
703   MatScalar   *value = v,*ap,*aa = a->a,*bap;
704 
705   PetscFunctionBegin;
706   if (roworiented) {
707     stepval = (n-1)*bs;
708   } else {
709     stepval = (m-1)*bs;
710   }
711   for (k=0; k<m; k++) { /* loop over added rows */
712     row  = im[k];
713     if (row < 0) continue;
714 #if defined(PETSC_USE_BOPT_g)
715     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
716 #endif
717     rp   = aj + ai[row];
718     ap   = aa + bs2*ai[row];
719     rmax = imax[row];
720     nrow = ailen[row];
721     low  = 0;
722     for (l=0; l<n; l++) { /* loop over added columns */
723       if (in[l] < 0) continue;
724 #if defined(PETSC_USE_BOPT_g)
725       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
726 #endif
727       col = in[l];
728       if (roworiented) {
729         value = v + k*(stepval+bs)*bs + l*bs;
730       } else {
731         value = v + l*(stepval+bs)*bs + k*bs;
732       }
733       if (!sorted) low = 0; high = nrow;
734       while (high-low > 7) {
735         t = (low+high)/2;
736         if (rp[t] > col) high = t;
737         else             low  = t;
738       }
739       for (i=low; i<high; i++) {
740         if (rp[i] > col) break;
741         if (rp[i] == col) {
742           bap  = ap +  bs2*i;
743           if (roworiented) {
744             if (is == ADD_VALUES) {
745               for (ii=0; ii<bs; ii++,value+=stepval) {
746                 for (jj=ii; jj<bs2; jj+=bs) {
747                   bap[jj] += *value++;
748                 }
749               }
750             } else {
751               for (ii=0; ii<bs; ii++,value+=stepval) {
752                 for (jj=ii; jj<bs2; jj+=bs) {
753                   bap[jj] = *value++;
754                 }
755               }
756             }
757           } else {
758             if (is == ADD_VALUES) {
759               for (ii=0; ii<bs; ii++,value+=stepval) {
760                 for (jj=0; jj<bs; jj++) {
761                   *bap++ += *value++;
762                 }
763               }
764             } else {
765               for (ii=0; ii<bs; ii++,value+=stepval) {
766                 for (jj=0; jj<bs; jj++) {
767                   *bap++  = *value++;
768                 }
769               }
770             }
771           }
772           goto noinsert2;
773         }
774       }
775       if (nonew == 1) goto noinsert2;
776       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
777       if (nrow >= rmax) {
778         /* there is no extra room in row, therefore enlarge */
779         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
780         MatScalar *new_a;
781 
782         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
783 
784         /* malloc new storage space */
785         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
786 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
787         new_j   = (int*)(new_a + bs2*new_nz);
788         new_i   = new_j + new_nz;
789 
790         /* copy over old data into new slots */
791         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
792         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
793         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
794         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
795         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
796         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
797         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
798         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
799         /* free up old matrix storage */
800         ierr = PetscFree(a->a);CHKERRQ(ierr);
801         if (!a->singlemalloc) {
802           ierr = PetscFree(a->i);CHKERRQ(ierr);
803           ierr = PetscFree(a->j);CHKERRQ(ierr);
804         }
805         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
806         a->singlemalloc = PETSC_TRUE;
807 
808         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
809         rmax = imax[row] = imax[row] + CHUNKSIZE;
810         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
811         a->maxnz += bs2*CHUNKSIZE;
812         a->reallocs++;
813         a->nz++;
814       }
815       N = nrow++ - 1;
816       /* shift up all the later entries in this row */
817       for (ii=N; ii>=i; ii--) {
818         rp[ii+1] = rp[ii];
819         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
820       }
821       if (N >= i) {
822         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
823       }
824       rp[i] = col;
825       bap   = ap +  bs2*i;
826       if (roworiented) {
827         for (ii=0; ii<bs; ii++,value+=stepval) {
828           for (jj=ii; jj<bs2; jj+=bs) {
829             bap[jj] = *value++;
830           }
831         }
832       } else {
833         for (ii=0; ii<bs; ii++,value+=stepval) {
834           for (jj=0; jj<bs; jj++) {
835             *bap++  = *value++;
836           }
837         }
838       }
839       noinsert2:;
840       low = i;
841     }
842     ailen[row] = nrow;
843   }
844   PetscFunctionReturn(0);
845 }
846 
847 #undef __FUNCT__
848 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
849 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
850 {
851   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
852   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
853   int        m = A->m,*ip,N,*ailen = a->ilen;
854   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
855   MatScalar  *aa = a->a,*ap;
856 #if defined(PETSC_HAVE_DSCPACK)
857   PetscTruth   flag;
858 #endif
859 
860   PetscFunctionBegin;
861   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
862 
863   if (m) rmax = ailen[0];
864   for (i=1; i<mbs; i++) {
865     /* move each row back by the amount of empty slots (fshift) before it*/
866     fshift += imax[i-1] - ailen[i-1];
867     rmax   = PetscMax(rmax,ailen[i]);
868     if (fshift) {
869       ip = aj + ai[i]; ap = aa + bs2*ai[i];
870       N = ailen[i];
871       for (j=0; j<N; j++) {
872         ip[j-fshift] = ip[j];
873         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
874       }
875     }
876     ai[i] = ai[i-1] + ailen[i-1];
877   }
878   if (mbs) {
879     fshift += imax[mbs-1] - ailen[mbs-1];
880     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
881   }
882   /* reset ilen and imax for each row */
883   for (i=0; i<mbs; i++) {
884     ailen[i] = imax[i] = ai[i+1] - ai[i];
885   }
886   a->nz = ai[mbs];
887 
888   /* diagonals may have moved, so kill the diagonal pointers */
889   if (fshift && a->diag) {
890     ierr = PetscFree(a->diag);CHKERRQ(ierr);
891     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
892     a->diag = 0;
893   }
894   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
895   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
896   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
897   a->reallocs          = 0;
898   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
899 
900 #if defined(PETSC_HAVE_DSCPACK)
901   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);CHKERRQ(ierr);
902   if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); }
903 #endif
904 
905   PetscFunctionReturn(0);
906 }
907 
908 
909 
910 /*
911    This function returns an array of flags which indicate the locations of contiguous
912    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
913    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
914    Assume: sizes should be long enough to hold all the values.
915 */
916 #undef __FUNCT__
917 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
918 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
919 {
920   int        i,j,k,row;
921   PetscTruth flg;
922 
923   PetscFunctionBegin;
924   for (i=0,j=0; i<n; j++) {
925     row = idx[i];
926     if (row%bs!=0) { /* Not the begining of a block */
927       sizes[j] = 1;
928       i++;
929     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
930       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
931       i++;
932     } else { /* Begining of the block, so check if the complete block exists */
933       flg = PETSC_TRUE;
934       for (k=1; k<bs; k++) {
935         if (row+k != idx[i+k]) { /* break in the block */
936           flg = PETSC_FALSE;
937           break;
938         }
939       }
940       if (flg == PETSC_TRUE) { /* No break in the bs */
941         sizes[j] = bs;
942         i+= bs;
943       } else {
944         sizes[j] = 1;
945         i++;
946       }
947     }
948   }
949   *bs_max = j;
950   PetscFunctionReturn(0);
951 }
952 
953 #undef __FUNCT__
954 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
955 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag)
956 {
957   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
958   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
959   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
960   PetscScalar zero = 0.0;
961   MatScalar   *aa;
962 
963   PetscFunctionBegin;
964   /* Make a copy of the IS and  sort it */
965   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
966   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
967 
968   /* allocate memory for rows,sizes */
969   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
970   sizes = rows + is_n;
971 
972   /* copy IS values to rows, and sort them */
973   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
974   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
975   if (baij->keepzeroedrows) {
976     for (i=0; i<is_n; i++) { sizes[i] = 1; }
977     bs_max = is_n;
978   } else {
979     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
980   }
981   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
982 
983   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
984     row   = rows[j];
985     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
986     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
987     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
988     if (sizes[i] == bs && !baij->keepzeroedrows) {
989       if (diag) {
990         if (baij->ilen[row/bs] > 0) {
991           baij->ilen[row/bs]       = 1;
992           baij->j[baij->i[row/bs]] = row/bs;
993           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
994         }
995         /* Now insert all the diagonal values for this bs */
996         for (k=0; k<bs; k++) {
997           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
998         }
999       } else { /* (!diag) */
1000         baij->ilen[row/bs] = 0;
1001       } /* end (!diag) */
1002     } else { /* (sizes[i] != bs) */
1003 #if defined (PETSC_USE_DEBUG)
1004       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1005 #endif
1006       for (k=0; k<count; k++) {
1007         aa[0] =  zero;
1008         aa    += bs;
1009       }
1010       if (diag) {
1011         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1012       }
1013     }
1014   }
1015 
1016   ierr = PetscFree(rows);CHKERRQ(ierr);
1017   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1023 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
1024 {
1025   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1026   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1027   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1028   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1029   int         ridx,cidx,bs2=a->bs2,ierr;
1030   PetscTruth  roworiented=a->roworiented;
1031   MatScalar   *ap,value,*aa=a->a,*bap;
1032 
1033   PetscFunctionBegin;
1034   for (k=0; k<m; k++) { /* loop over added rows */
1035     row  = im[k]; brow = row/bs;
1036     if (row < 0) continue;
1037 #if defined(PETSC_USE_BOPT_g)
1038     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1039 #endif
1040     rp   = aj + ai[brow];
1041     ap   = aa + bs2*ai[brow];
1042     rmax = imax[brow];
1043     nrow = ailen[brow];
1044     low  = 0;
1045     for (l=0; l<n; l++) { /* loop over added columns */
1046       if (in[l] < 0) continue;
1047 #if defined(PETSC_USE_BOPT_g)
1048       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1049 #endif
1050       col = in[l]; bcol = col/bs;
1051       ridx = row % bs; cidx = col % bs;
1052       if (roworiented) {
1053         value = v[l + k*n];
1054       } else {
1055         value = v[k + l*m];
1056       }
1057       if (!sorted) low = 0; high = nrow;
1058       while (high-low > 7) {
1059         t = (low+high)/2;
1060         if (rp[t] > bcol) high = t;
1061         else              low  = t;
1062       }
1063       for (i=low; i<high; i++) {
1064         if (rp[i] > bcol) break;
1065         if (rp[i] == bcol) {
1066           bap  = ap +  bs2*i + bs*cidx + ridx;
1067           if (is == ADD_VALUES) *bap += value;
1068           else                  *bap  = value;
1069           goto noinsert1;
1070         }
1071       }
1072       if (nonew == 1) goto noinsert1;
1073       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1074       if (nrow >= rmax) {
1075         /* there is no extra room in row, therefore enlarge */
1076         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1077         MatScalar *new_a;
1078 
1079         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1080 
1081         /* Malloc new storage space */
1082         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1083 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1084         new_j   = (int*)(new_a + bs2*new_nz);
1085         new_i   = new_j + new_nz;
1086 
1087         /* copy over old data into new slots */
1088         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1089         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1090         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1091         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1092         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1093         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1094         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1095         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1096         /* free up old matrix storage */
1097         ierr = PetscFree(a->a);CHKERRQ(ierr);
1098         if (!a->singlemalloc) {
1099           ierr = PetscFree(a->i);CHKERRQ(ierr);
1100           ierr = PetscFree(a->j);CHKERRQ(ierr);
1101         }
1102         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1103         a->singlemalloc = PETSC_TRUE;
1104 
1105         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1106         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1107         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1108         a->maxnz += bs2*CHUNKSIZE;
1109         a->reallocs++;
1110         a->nz++;
1111       }
1112       N = nrow++ - 1;
1113       /* shift up all the later entries in this row */
1114       for (ii=N; ii>=i; ii--) {
1115         rp[ii+1] = rp[ii];
1116         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1117       }
1118       if (N>=i) {
1119         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1120       }
1121       rp[i]                      = bcol;
1122       ap[bs2*i + bs*cidx + ridx] = value;
1123       noinsert1:;
1124       low = i;
1125     }
1126     ailen[brow] = nrow;
1127   }
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 
1132 #undef __FUNCT__
1133 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1134 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1135 {
1136   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1137   Mat         outA;
1138   int         ierr;
1139   PetscTruth  row_identity,col_identity;
1140 
1141   PetscFunctionBegin;
1142   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1143   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1144   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1145   if (!row_identity || !col_identity) {
1146     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1147   }
1148 
1149   outA          = inA;
1150   inA->factor   = FACTOR_LU;
1151 
1152   if (!a->diag) {
1153     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1154   }
1155 
1156   a->row        = row;
1157   a->col        = col;
1158   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1159   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1160 
1161   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1162   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1163   PetscLogObjectParent(inA,a->icol);
1164 
1165   /*
1166       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1167       for ILU(0) factorization with natural ordering
1168   */
1169   if (a->bs < 8) {
1170     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1171   } else {
1172     if (!a->solve_work) {
1173       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1174       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1175     }
1176   }
1177 
1178   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1179 
1180   PetscFunctionReturn(0);
1181 }
1182 #undef __FUNCT__
1183 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1184 int MatPrintHelp_SeqBAIJ(Mat A)
1185 {
1186   static PetscTruth called = PETSC_FALSE;
1187   MPI_Comm          comm = A->comm;
1188   int               ierr;
1189 
1190   PetscFunctionBegin;
1191   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1192   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1193   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 EXTERN_C_BEGIN
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1200 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1201 {
1202   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1203   int         i,nz,nbs;
1204 
1205   PetscFunctionBegin;
1206   nz  = baij->maxnz/baij->bs2;
1207   nbs = baij->nbs;
1208   for (i=0; i<nz; i++) {
1209     baij->j[i] = indices[i];
1210   }
1211   baij->nz = nz;
1212   for (i=0; i<nbs; i++) {
1213     baij->ilen[i] = baij->imax[i];
1214   }
1215 
1216   PetscFunctionReturn(0);
1217 }
1218 EXTERN_C_END
1219 
1220 #undef __FUNCT__
1221 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1222 /*@
1223     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1224        in the matrix.
1225 
1226   Input Parameters:
1227 +  mat - the SeqBAIJ matrix
1228 -  indices - the column indices
1229 
1230   Level: advanced
1231 
1232   Notes:
1233     This can be called if you have precomputed the nonzero structure of the
1234   matrix and want to provide it to the matrix object to improve the performance
1235   of the MatSetValues() operation.
1236 
1237     You MUST have set the correct numbers of nonzeros per row in the call to
1238   MatCreateSeqBAIJ().
1239 
1240     MUST be called before any calls to MatSetValues();
1241 
1242 @*/
1243 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1244 {
1245   int ierr,(*f)(Mat,int *);
1246 
1247   PetscFunctionBegin;
1248   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1249   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1250   if (f) {
1251     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1252   } else {
1253     SETERRQ(1,"Wrong type of matrix to set column indices");
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1260 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1261 {
1262   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1263   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1264   PetscReal    atmp;
1265   PetscScalar  *x,zero = 0.0;
1266   MatScalar    *aa;
1267   int          ncols,brow,krow,kcol;
1268 
1269   PetscFunctionBegin;
1270   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1271   bs   = a->bs;
1272   aa   = a->a;
1273   ai   = a->i;
1274   aj   = a->j;
1275   mbs = a->mbs;
1276 
1277   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1278   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1279   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1280   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1281   for (i=0; i<mbs; i++) {
1282     ncols = ai[1] - ai[0]; ai++;
1283     brow  = bs*i;
1284     for (j=0; j<ncols; j++){
1285       /* bcol = bs*(*aj); */
1286       for (kcol=0; kcol<bs; kcol++){
1287         for (krow=0; krow<bs; krow++){
1288           atmp = PetscAbsScalar(*aa); aa++;
1289           row = brow + krow;    /* row index */
1290           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1291           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1292         }
1293       }
1294       aj++;
1295     }
1296   }
1297   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 #undef __FUNCT__
1302 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1303 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1304 {
1305   int        ierr;
1306 
1307   PetscFunctionBegin;
1308   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1314 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array)
1315 {
1316   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1317   PetscFunctionBegin;
1318   *array = a->a;
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1324 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array)
1325 {
1326   PetscFunctionBegin;
1327   PetscFunctionReturn(0);
1328 }
1329 
1330 /* -------------------------------------------------------------------*/
1331 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1332        MatGetRow_SeqBAIJ,
1333        MatRestoreRow_SeqBAIJ,
1334        MatMult_SeqBAIJ_N,
1335        MatMultAdd_SeqBAIJ_N,
1336        MatMultTranspose_SeqBAIJ,
1337        MatMultTransposeAdd_SeqBAIJ,
1338        MatSolve_SeqBAIJ_N,
1339        0,
1340        0,
1341        0,
1342        MatLUFactor_SeqBAIJ,
1343        0,
1344        0,
1345        MatTranspose_SeqBAIJ,
1346        MatGetInfo_SeqBAIJ,
1347        MatEqual_SeqBAIJ,
1348        MatGetDiagonal_SeqBAIJ,
1349        MatDiagonalScale_SeqBAIJ,
1350        MatNorm_SeqBAIJ,
1351        0,
1352        MatAssemblyEnd_SeqBAIJ,
1353        0,
1354        MatSetOption_SeqBAIJ,
1355        MatZeroEntries_SeqBAIJ,
1356        MatZeroRows_SeqBAIJ,
1357        MatLUFactorSymbolic_SeqBAIJ,
1358        MatLUFactorNumeric_SeqBAIJ_N,
1359        0,
1360        0,
1361        MatSetUpPreallocation_SeqBAIJ,
1362        MatILUFactorSymbolic_SeqBAIJ,
1363        0,
1364        MatGetArray_SeqBAIJ,
1365        MatRestoreArray_SeqBAIJ,
1366        MatDuplicate_SeqBAIJ,
1367        0,
1368        0,
1369        MatILUFactor_SeqBAIJ,
1370        0,
1371        0,
1372        MatGetSubMatrices_SeqBAIJ,
1373        MatIncreaseOverlap_SeqBAIJ,
1374        MatGetValues_SeqBAIJ,
1375        0,
1376        MatPrintHelp_SeqBAIJ,
1377        MatScale_SeqBAIJ,
1378        0,
1379        0,
1380        0,
1381        MatGetBlockSize_SeqBAIJ,
1382        MatGetRowIJ_SeqBAIJ,
1383        MatRestoreRowIJ_SeqBAIJ,
1384        0,
1385        0,
1386        0,
1387        0,
1388        0,
1389        0,
1390        MatSetValuesBlocked_SeqBAIJ,
1391        MatGetSubMatrix_SeqBAIJ,
1392        MatDestroy_SeqBAIJ,
1393        MatView_SeqBAIJ,
1394        MatGetPetscMaps_Petsc,
1395        0,
1396        0,
1397        0,
1398        0,
1399        0,
1400        0,
1401        MatGetRowMax_SeqBAIJ,
1402        MatConvert_Basic};
1403 
1404 EXTERN_C_BEGIN
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1407 int MatStoreValues_SeqBAIJ(Mat mat)
1408 {
1409   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1410   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1411   int         ierr;
1412 
1413   PetscFunctionBegin;
1414   if (aij->nonew != 1) {
1415     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1416   }
1417 
1418   /* allocate space for values if not already there */
1419   if (!aij->saved_values) {
1420     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1421   }
1422 
1423   /* copy values over */
1424   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 EXTERN_C_END
1428 
1429 EXTERN_C_BEGIN
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1432 int MatRetrieveValues_SeqBAIJ(Mat mat)
1433 {
1434   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1435   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1436 
1437   PetscFunctionBegin;
1438   if (aij->nonew != 1) {
1439     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1440   }
1441   if (!aij->saved_values) {
1442     SETERRQ(1,"Must call MatStoreValues(A);first");
1443   }
1444 
1445   /* copy values over */
1446   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1447   PetscFunctionReturn(0);
1448 }
1449 EXTERN_C_END
1450 
1451 EXTERN_C_BEGIN
1452 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1453 EXTERN_C_END
1454 
1455 EXTERN_C_BEGIN
1456 #undef __FUNCT__
1457 #define __FUNCT__ "MatCreate_SeqBAIJ"
1458 int MatCreate_SeqBAIJ(Mat B)
1459 {
1460   int         ierr,size;
1461   Mat_SeqBAIJ *b;
1462 
1463   PetscFunctionBegin;
1464   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1465   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1466 
1467   B->m = B->M = PetscMax(B->m,B->M);
1468   B->n = B->N = PetscMax(B->n,B->N);
1469   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1470   B->data = (void*)b;
1471   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1472   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1473   B->factor           = 0;
1474   B->lupivotthreshold = 1.0;
1475   B->mapping          = 0;
1476   b->row              = 0;
1477   b->col              = 0;
1478   b->icol             = 0;
1479   b->reallocs         = 0;
1480   b->saved_values     = 0;
1481 #if defined(PETSC_USE_MAT_SINGLE)
1482   b->setvalueslen     = 0;
1483   b->setvaluescopy    = PETSC_NULL;
1484 #endif
1485   b->single_precision_solves = PETSC_FALSE;
1486 
1487   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1488   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1489 
1490   b->sorted           = PETSC_FALSE;
1491   b->roworiented      = PETSC_TRUE;
1492   b->nonew            = 0;
1493   b->diag             = 0;
1494   b->solve_work       = 0;
1495   b->mult_work        = 0;
1496   B->spptr            = 0;
1497   B->info.nz_unneeded = (PetscReal)b->maxnz;
1498   b->keepzeroedrows   = PETSC_FALSE;
1499 
1500   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1501                                      "MatStoreValues_SeqBAIJ",
1502                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1503   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1504                                      "MatRetrieveValues_SeqBAIJ",
1505                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1506   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1507                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1508                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1509   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1510                                      "MatConvert_SeqBAIJ_SeqAIJ",
1511                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1512   PetscFunctionReturn(0);
1513 }
1514 EXTERN_C_END
1515 
1516 #undef __FUNCT__
1517 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1518 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1519 {
1520   Mat         C;
1521   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1522   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1523 
1524   PetscFunctionBegin;
1525   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1526 
1527   *B = 0;
1528   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1529   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1530   c    = (Mat_SeqBAIJ*)C->data;
1531 
1532   c->bs         = a->bs;
1533   c->bs2        = a->bs2;
1534   c->mbs        = a->mbs;
1535   c->nbs        = a->nbs;
1536   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1537 
1538   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1539   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1540   for (i=0; i<mbs; i++) {
1541     c->imax[i] = a->imax[i];
1542     c->ilen[i] = a->ilen[i];
1543   }
1544 
1545   /* allocate the matrix space */
1546   c->singlemalloc = PETSC_TRUE;
1547   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1548   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1549   c->j = (int*)(c->a + nz*bs2);
1550   c->i = c->j + nz;
1551   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1552   if (mbs > 0) {
1553     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1554     if (cpvalues == MAT_COPY_VALUES) {
1555       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1556     } else {
1557       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1558     }
1559   }
1560 
1561   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1562   c->sorted      = a->sorted;
1563   c->roworiented = a->roworiented;
1564   c->nonew       = a->nonew;
1565 
1566   if (a->diag) {
1567     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1568     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1569     for (i=0; i<mbs; i++) {
1570       c->diag[i] = a->diag[i];
1571     }
1572   } else c->diag        = 0;
1573   c->nz                 = a->nz;
1574   c->maxnz              = a->maxnz;
1575   c->solve_work         = 0;
1576   C->spptr              = 0;     /* Dangerous -I'm throwing away a->spptr */
1577   c->mult_work          = 0;
1578   C->preallocated       = PETSC_TRUE;
1579   C->assembled          = PETSC_TRUE;
1580   *B = C;
1581   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 EXTERN_C_BEGIN
1586 #undef __FUNCT__
1587 #define __FUNCT__ "MatLoad_SeqBAIJ"
1588 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1589 {
1590   Mat_SeqBAIJ  *a;
1591   Mat          B;
1592   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1593   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1594   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1595   int          *masked,nmask,tmp,bs2,ishift;
1596   PetscScalar  *aa;
1597   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1598 
1599   PetscFunctionBegin;
1600   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1601   bs2  = bs*bs;
1602 
1603   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1604   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1605   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1606   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1607   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1608   M = header[1]; N = header[2]; nz = header[3];
1609 
1610   if (header[3] < 0) {
1611     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1612   }
1613 
1614   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1615 
1616   /*
1617      This code adds extra rows to make sure the number of rows is
1618     divisible by the blocksize
1619   */
1620   mbs        = M/bs;
1621   extra_rows = bs - M + bs*(mbs);
1622   if (extra_rows == bs) extra_rows = 0;
1623   else                  mbs++;
1624   if (extra_rows) {
1625     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1626   }
1627 
1628   /* read in row lengths */
1629   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1630   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1631   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1632 
1633   /* read in column indices */
1634   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1635   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1636   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1637 
1638   /* loop over row lengths determining block row lengths */
1639   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1640   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1641   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1642   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1643   masked   = mask + mbs;
1644   rowcount = 0; nzcount = 0;
1645   for (i=0; i<mbs; i++) {
1646     nmask = 0;
1647     for (j=0; j<bs; j++) {
1648       kmax = rowlengths[rowcount];
1649       for (k=0; k<kmax; k++) {
1650         tmp = jj[nzcount++]/bs;
1651         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1652       }
1653       rowcount++;
1654     }
1655     browlengths[i] += nmask;
1656     /* zero out the mask elements we set */
1657     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1658   }
1659 
1660   /* create our matrix */
1661   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1662   B = *A;
1663   a = (Mat_SeqBAIJ*)B->data;
1664 
1665   /* set matrix "i" values */
1666   a->i[0] = 0;
1667   for (i=1; i<= mbs; i++) {
1668     a->i[i]      = a->i[i-1] + browlengths[i-1];
1669     a->ilen[i-1] = browlengths[i-1];
1670   }
1671   a->nz         = 0;
1672   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1673 
1674   /* read in nonzero values */
1675   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1676   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1677   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1678 
1679   /* set "a" and "j" values into matrix */
1680   nzcount = 0; jcount = 0;
1681   for (i=0; i<mbs; i++) {
1682     nzcountb = nzcount;
1683     nmask    = 0;
1684     for (j=0; j<bs; j++) {
1685       kmax = rowlengths[i*bs+j];
1686       for (k=0; k<kmax; k++) {
1687         tmp = jj[nzcount++]/bs;
1688 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1689       }
1690     }
1691     /* sort the masked values */
1692     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1693 
1694     /* set "j" values into matrix */
1695     maskcount = 1;
1696     for (j=0; j<nmask; j++) {
1697       a->j[jcount++]  = masked[j];
1698       mask[masked[j]] = maskcount++;
1699     }
1700     /* set "a" values into matrix */
1701     ishift = bs2*a->i[i];
1702     for (j=0; j<bs; j++) {
1703       kmax = rowlengths[i*bs+j];
1704       for (k=0; k<kmax; k++) {
1705         tmp       = jj[nzcountb]/bs ;
1706         block     = mask[tmp] - 1;
1707         point     = jj[nzcountb] - bs*tmp;
1708         idx       = ishift + bs2*block + j + bs*point;
1709         a->a[idx] = (MatScalar)aa[nzcountb++];
1710       }
1711     }
1712     /* zero out the mask elements we set */
1713     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1714   }
1715   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1716 
1717   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1718   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1719   ierr = PetscFree(aa);CHKERRQ(ierr);
1720   ierr = PetscFree(jj);CHKERRQ(ierr);
1721   ierr = PetscFree(mask);CHKERRQ(ierr);
1722 
1723   B->assembled = PETSC_TRUE;
1724 
1725   ierr = MatView_Private(B);CHKERRQ(ierr);
1726   PetscFunctionReturn(0);
1727 }
1728 EXTERN_C_END
1729 
1730 #undef __FUNCT__
1731 #define __FUNCT__ "MatCreateSeqBAIJ"
1732 /*@C
1733    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1734    compressed row) format.  For good matrix assembly performance the
1735    user should preallocate the matrix storage by setting the parameter nz
1736    (or the array nnz).  By setting these parameters accurately, performance
1737    during matrix assembly can be increased by more than a factor of 50.
1738 
1739    Collective on MPI_Comm
1740 
1741    Input Parameters:
1742 +  comm - MPI communicator, set to PETSC_COMM_SELF
1743 .  bs - size of block
1744 .  m - number of rows
1745 .  n - number of columns
1746 .  nz - number of nonzero blocks  per block row (same for all rows)
1747 -  nnz - array containing the number of nonzero blocks in the various block rows
1748          (possibly different for each block row) or PETSC_NULL
1749 
1750    Output Parameter:
1751 .  A - the matrix
1752 
1753    Options Database Keys:
1754 .   -mat_no_unroll - uses code that does not unroll the loops in the
1755                      block calculations (much slower)
1756 .    -mat_block_size - size of the blocks to use
1757 
1758    Level: intermediate
1759 
1760    Notes:
1761    A nonzero block is any block that as 1 or more nonzeros in it
1762 
1763    The block AIJ format is fully compatible with standard Fortran 77
1764    storage.  That is, the stored row and column indices can begin at
1765    either one (as in Fortran) or zero.  See the users' manual for details.
1766 
1767    Specify the preallocated storage with either nz or nnz (not both).
1768    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1769    allocation.  For additional details, see the users manual chapter on
1770    matrices.
1771 
1772 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1773 @*/
1774 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1775 {
1776   int ierr;
1777 
1778   PetscFunctionBegin;
1779   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1780   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
1781   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1782   PetscFunctionReturn(0);
1783 }
1784 
1785 #undef __FUNCT__
1786 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
1787 /*@C
1788    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1789    per row in the matrix. For good matrix assembly performance the
1790    user should preallocate the matrix storage by setting the parameter nz
1791    (or the array nnz).  By setting these parameters accurately, performance
1792    during matrix assembly can be increased by more than a factor of 50.
1793 
1794    Collective on MPI_Comm
1795 
1796    Input Parameters:
1797 +  A - the matrix
1798 .  bs - size of block
1799 .  nz - number of block nonzeros per block row (same for all rows)
1800 -  nnz - array containing the number of block nonzeros in the various block rows
1801          (possibly different for each block row) or PETSC_NULL
1802 
1803    Options Database Keys:
1804 .   -mat_no_unroll - uses code that does not unroll the loops in the
1805                      block calculations (much slower)
1806 .    -mat_block_size - size of the blocks to use
1807 
1808    Level: intermediate
1809 
1810    Notes:
1811    The block AIJ format is fully compatible with standard Fortran 77
1812    storage.  That is, the stored row and column indices can begin at
1813    either one (as in Fortran) or zero.  See the users' manual for details.
1814 
1815    Specify the preallocated storage with either nz or nnz (not both).
1816    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1817    allocation.  For additional details, see the users manual chapter on
1818    matrices.
1819 
1820 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1821 @*/
1822 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1823 {
1824   Mat_SeqBAIJ *b;
1825   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1826   PetscTruth  flg;
1827 
1828   PetscFunctionBegin;
1829   ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr);
1830   if (!flg) PetscFunctionReturn(0);
1831 
1832   B->preallocated = PETSC_TRUE;
1833   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1834   if (nnz && newbs != bs) {
1835     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1836   }
1837   bs   = newbs;
1838 
1839   mbs  = B->m/bs;
1840   nbs  = B->n/bs;
1841   bs2  = bs*bs;
1842 
1843   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1844     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1845   }
1846 
1847   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1848   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1849   if (nnz) {
1850     for (i=0; i<mbs; i++) {
1851       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1852       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1853     }
1854   }
1855 
1856   b       = (Mat_SeqBAIJ*)B->data;
1857   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1858   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1859   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1860   if (!flg) {
1861     switch (bs) {
1862     case 1:
1863       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1864       B->ops->mult            = MatMult_SeqBAIJ_1;
1865       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1866       break;
1867     case 2:
1868       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1869       B->ops->mult            = MatMult_SeqBAIJ_2;
1870       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1871       break;
1872     case 3:
1873       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1874       B->ops->mult            = MatMult_SeqBAIJ_3;
1875       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1876       break;
1877     case 4:
1878       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1879       B->ops->mult            = MatMult_SeqBAIJ_4;
1880       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1881       break;
1882     case 5:
1883       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1884       B->ops->mult            = MatMult_SeqBAIJ_5;
1885       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1886       break;
1887     case 6:
1888       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1889       B->ops->mult            = MatMult_SeqBAIJ_6;
1890       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1891       break;
1892     case 7:
1893       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1894       B->ops->mult            = MatMult_SeqBAIJ_7;
1895       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1896       break;
1897     default:
1898       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1899       B->ops->mult            = MatMult_SeqBAIJ_N;
1900       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1901       break;
1902     }
1903   }
1904   b->bs      = bs;
1905   b->mbs     = mbs;
1906   b->nbs     = nbs;
1907   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1908   if (!nnz) {
1909     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1910     else if (nz <= 0)        nz = 1;
1911     for (i=0; i<mbs; i++) b->imax[i] = nz;
1912     nz = nz*mbs;
1913   } else {
1914     nz = 0;
1915     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1916   }
1917 
1918   /* allocate the matrix space */
1919   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1920   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1921   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1922   b->j  = (int*)(b->a + nz*bs2);
1923   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1924   b->i  = b->j + nz;
1925   b->singlemalloc = PETSC_TRUE;
1926 
1927   b->i[0] = 0;
1928   for (i=1; i<mbs+1; i++) {
1929     b->i[i] = b->i[i-1] + b->imax[i-1];
1930   }
1931 
1932   /* b->ilen will count nonzeros in each block row so far. */
1933   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1934   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1935   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1936 
1937   b->bs               = bs;
1938   b->bs2              = bs2;
1939   b->mbs              = mbs;
1940   b->nz               = 0;
1941   b->maxnz            = nz*bs2;
1942   B->info.nz_unneeded = (PetscReal)b->maxnz;
1943   PetscFunctionReturn(0);
1944 }
1945 
1946