xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 2f2cbf6d06d0509fc30bbb0fccbdb1461a4f72a9)
1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 /*
12     Special version for Fun3d sequential benchmark
13 */
14 #if defined(PETSC_HAVE_FORTRAN_CAPS)
15 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
16 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
17 #define matsetvaluesblocked4_ matsetvaluesblocked4
18 #endif
19 
20 EXTERN_C_BEGIN
21 #undef __FUNCT__
22 #define __FUNCT__ "matsetvaluesblocked4_"
23 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[])
24 {
25   Mat               A = *AA;
26   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
27   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
28   int               *ai=a->i,*ailen=a->ilen;
29   int               *aj=a->j,stepval;
30   const PetscScalar *value = v;
31   MatScalar         *ap,*aa = a->a,*bap;
32 
33   PetscFunctionBegin;
34   stepval = (n-1)*4;
35   for (k=0; k<m; k++) { /* loop over added rows */
36     row  = im[k];
37     rp   = aj + ai[row];
38     ap   = aa + 16*ai[row];
39     nrow = ailen[row];
40     low  = 0;
41     for (l=0; l<n; l++) { /* loop over added columns */
42       col = in[l];
43       value = v + k*(stepval+4)*4 + l*4;
44       low = 0; high = nrow;
45       while (high-low > 7) {
46         t = (low+high)/2;
47         if (rp[t] > col) high = t;
48         else             low  = t;
49       }
50       for (i=low; i<high; i++) {
51         if (rp[i] > col) break;
52         if (rp[i] == col) {
53           bap  = ap +  16*i;
54           for (ii=0; ii<4; ii++,value+=stepval) {
55             for (jj=ii; jj<16; jj+=4) {
56               bap[jj] += *value++;
57             }
58           }
59           goto noinsert2;
60         }
61       }
62       N = nrow++ - 1;
63       /* shift up all the later entries in this row */
64       for (ii=N; ii>=i; ii--) {
65         rp[ii+1] = rp[ii];
66         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
67       }
68       if (N >= i) {
69         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
70       }
71       rp[i] = col;
72       bap   = ap +  16*i;
73       for (ii=0; ii<4; ii++,value+=stepval) {
74         for (jj=ii; jj<16; jj+=4) {
75           bap[jj] = *value++;
76         }
77       }
78       noinsert2:;
79       low = i;
80     }
81     ailen[row] = nrow;
82   }
83 }
84 EXTERN_C_END
85 
86 #if defined(PETSC_HAVE_FORTRAN_CAPS)
87 #define matsetvalues4_ MATSETVALUES4
88 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
89 #define matsetvalues4_ matsetvalues4
90 #endif
91 
92 EXTERN_C_BEGIN
93 #undef __FUNCT__
94 #define __FUNCT__ "MatSetValues4_"
95 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
96 {
97   Mat         A = *AA;
98   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
99   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
100   int         *ai=a->i,*ailen=a->ilen;
101   int         *aj=a->j,brow,bcol;
102   int         ridx,cidx;
103   MatScalar   *ap,value,*aa=a->a,*bap;
104 
105   PetscFunctionBegin;
106   for (k=0; k<m; k++) { /* loop over added rows */
107     row  = im[k]; brow = row/4;
108     rp   = aj + ai[brow];
109     ap   = aa + 16*ai[brow];
110     nrow = ailen[brow];
111     low  = 0;
112     for (l=0; l<n; l++) { /* loop over added columns */
113       col = in[l]; bcol = col/4;
114       ridx = row % 4; cidx = col % 4;
115       value = v[l + k*n];
116       low = 0; high = nrow;
117       while (high-low > 7) {
118         t = (low+high)/2;
119         if (rp[t] > bcol) high = t;
120         else              low  = t;
121       }
122       for (i=low; i<high; i++) {
123         if (rp[i] > bcol) break;
124         if (rp[i] == bcol) {
125           bap  = ap +  16*i + 4*cidx + ridx;
126           *bap += value;
127           goto noinsert1;
128         }
129       }
130       N = nrow++ - 1;
131       /* shift up all the later entries in this row */
132       for (ii=N; ii>=i; ii--) {
133         rp[ii+1] = rp[ii];
134         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
135       }
136       if (N>=i) {
137         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
138       }
139       rp[i]                    = bcol;
140       ap[16*i + 4*cidx + ridx] = value;
141       noinsert1:;
142       low = i;
143     }
144     ailen[brow] = nrow;
145   }
146 }
147 EXTERN_C_END
148 
149 /*  UGLY, ugly, ugly
150    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
151    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
152    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
153    converts the entries into single precision and then calls ..._MatScalar() to put them
154    into the single precision data structures.
155 */
156 #if defined(PETSC_USE_MAT_SINGLE)
157 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
158 #else
159 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
160 #endif
161 
162 #define CHUNKSIZE  10
163 
164 /*
165      Checks for missing diagonals
166 */
167 #undef __FUNCT__
168 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
169 int MatMissingDiagonal_SeqBAIJ(Mat A)
170 {
171   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
172   int         *diag,*jj = a->j,i,ierr;
173 
174   PetscFunctionBegin;
175   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
176   diag = a->diag;
177   for (i=0; i<a->mbs; i++) {
178     if (jj[diag[i]] != i) {
179       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
180     }
181   }
182   PetscFunctionReturn(0);
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
187 int MatMarkDiagonal_SeqBAIJ(Mat A)
188 {
189   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
190   int         i,j,*diag,m = a->mbs,ierr;
191 
192   PetscFunctionBegin;
193   if (a->diag) PetscFunctionReturn(0);
194 
195   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
196   PetscLogObjectMemory(A,(m+1)*sizeof(int));
197   for (i=0; i<m; i++) {
198     diag[i] = a->i[i+1];
199     for (j=a->i[i]; j<a->i[i+1]; j++) {
200       if (a->j[j] == i) {
201         diag[i] = j;
202         break;
203       }
204     }
205   }
206   a->diag = diag;
207   PetscFunctionReturn(0);
208 }
209 
210 
211 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
212 
213 #undef __FUNCT__
214 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
215 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
216 {
217   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
218   int         ierr,n = a->mbs,i;
219 
220   PetscFunctionBegin;
221   *nn = n;
222   if (!ia) PetscFunctionReturn(0);
223   if (symmetric) {
224     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
225   } else if (oshift == 1) {
226     /* temporarily add 1 to i and j indices */
227     int nz = a->i[n];
228     for (i=0; i<nz; i++) a->j[i]++;
229     for (i=0; i<n+1; i++) a->i[i]++;
230     *ia = a->i; *ja = a->j;
231   } else {
232     *ia = a->i; *ja = a->j;
233   }
234 
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
240 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
241 {
242   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
243   int         i,n = a->mbs,ierr;
244 
245   PetscFunctionBegin;
246   if (!ia) PetscFunctionReturn(0);
247   if (symmetric) {
248     ierr = PetscFree(*ia);CHKERRQ(ierr);
249     ierr = PetscFree(*ja);CHKERRQ(ierr);
250   } else if (oshift == 1) {
251     int nz = a->i[n]-1;
252     for (i=0; i<nz; i++) a->j[i]--;
253     for (i=0; i<n+1; i++) a->i[i]--;
254   }
255   PetscFunctionReturn(0);
256 }
257 
258 #undef __FUNCT__
259 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
260 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
261 {
262   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
263 
264   PetscFunctionBegin;
265   *bs = baij->bs;
266   PetscFunctionReturn(0);
267 }
268 
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatDestroy_SeqBAIJ"
272 int MatDestroy_SeqBAIJ(Mat A)
273 {
274   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
275   int         ierr;
276 
277   PetscFunctionBegin;
278 #if defined(PETSC_USE_LOG)
279   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
280 #endif
281   ierr = PetscFree(a->a);CHKERRQ(ierr);
282   if (!a->singlemalloc) {
283     ierr = PetscFree(a->i);CHKERRQ(ierr);
284     ierr = PetscFree(a->j);CHKERRQ(ierr);
285   }
286   if (a->row) {
287     ierr = ISDestroy(a->row);CHKERRQ(ierr);
288   }
289   if (a->col) {
290     ierr = ISDestroy(a->col);CHKERRQ(ierr);
291   }
292   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
293   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
294   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
295   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
296   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
297   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
298   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
299 #if defined(PETSC_USE_MAT_SINGLE)
300   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
301 #endif
302   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
303 
304   ierr = PetscFree(a);CHKERRQ(ierr);
305   PetscFunctionReturn(0);
306 }
307 
308 #undef __FUNCT__
309 #define __FUNCT__ "MatSetOption_SeqBAIJ"
310 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
311 {
312   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
313 
314   PetscFunctionBegin;
315   switch (op) {
316   case MAT_ROW_ORIENTED:
317     a->roworiented    = PETSC_TRUE;
318     break;
319   case MAT_COLUMN_ORIENTED:
320     a->roworiented    = PETSC_FALSE;
321     break;
322   case MAT_COLUMNS_SORTED:
323     a->sorted         = PETSC_TRUE;
324     break;
325   case MAT_COLUMNS_UNSORTED:
326     a->sorted         = PETSC_FALSE;
327     break;
328   case MAT_KEEP_ZEROED_ROWS:
329     a->keepzeroedrows = PETSC_TRUE;
330     break;
331   case MAT_NO_NEW_NONZERO_LOCATIONS:
332     a->nonew          = 1;
333     break;
334   case MAT_NEW_NONZERO_LOCATION_ERR:
335     a->nonew          = -1;
336     break;
337   case MAT_NEW_NONZERO_ALLOCATION_ERR:
338     a->nonew          = -2;
339     break;
340   case MAT_YES_NEW_NONZERO_LOCATIONS:
341     a->nonew          = 0;
342     break;
343   case MAT_ROWS_SORTED:
344   case MAT_ROWS_UNSORTED:
345   case MAT_YES_NEW_DIAGONALS:
346   case MAT_IGNORE_OFF_PROC_ENTRIES:
347   case MAT_USE_HASH_TABLE:
348     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
349     break;
350   case MAT_NO_NEW_DIAGONALS:
351     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
352   case MAT_SYMMETRIC:
353   case MAT_STRUCTURALLY_SYMMETRIC:
354   case MAT_NOT_SYMMETRIC:
355   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
356   case MAT_HERMITIAN:
357   case MAT_NOT_HERMITIAN:
358   case MAT_SYMMETRY_ETERNAL:
359   case MAT_NOT_SYMMETRY_ETERNAL:
360     break;
361   default:
362     SETERRQ(PETSC_ERR_SUP,"unknown option");
363   }
364   PetscFunctionReturn(0);
365 }
366 
367 #undef __FUNCT__
368 #define __FUNCT__ "MatGetRow_SeqBAIJ"
369 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
370 {
371   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
372   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
373   MatScalar    *aa,*aa_i;
374   PetscScalar  *v_i;
375 
376   PetscFunctionBegin;
377   bs  = a->bs;
378   ai  = a->i;
379   aj  = a->j;
380   aa  = a->a;
381   bs2 = a->bs2;
382 
383   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row);
384 
385   bn  = row/bs;   /* Block number */
386   bp  = row % bs; /* Block Position */
387   M   = ai[bn+1] - ai[bn];
388   *nz = bs*M;
389 
390   if (v) {
391     *v = 0;
392     if (*nz) {
393       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
394       for (i=0; i<M; i++) { /* for each block in the block row */
395         v_i  = *v + i*bs;
396         aa_i = aa + bs2*(ai[bn] + i);
397         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
398       }
399     }
400   }
401 
402   if (idx) {
403     *idx = 0;
404     if (*nz) {
405       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
406       for (i=0; i<M; i++) { /* for each block in the block row */
407         idx_i = *idx + i*bs;
408         itmp  = bs*aj[ai[bn] + i];
409         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
410       }
411     }
412   }
413   PetscFunctionReturn(0);
414 }
415 
416 #undef __FUNCT__
417 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
418 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
419 {
420   int ierr;
421 
422   PetscFunctionBegin;
423   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
424   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatTranspose_SeqBAIJ"
430 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
431 {
432   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
433   Mat         C;
434   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
435   int         *rows,*cols,bs2=a->bs2;
436   PetscScalar *array;
437 
438   PetscFunctionBegin;
439   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
440   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
441   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
442 
443 #if defined(PETSC_USE_MAT_SINGLE)
444   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
445   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
446 #else
447   array = a->a;
448 #endif
449 
450   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
451   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
452   ierr = PetscFree(col);CHKERRQ(ierr);
453   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
454   cols = rows + bs;
455   for (i=0; i<mbs; i++) {
456     cols[0] = i*bs;
457     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
458     len = ai[i+1] - ai[i];
459     for (j=0; j<len; j++) {
460       rows[0] = (*aj++)*bs;
461       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
462       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
463       array += bs2;
464     }
465   }
466   ierr = PetscFree(rows);CHKERRQ(ierr);
467 #if defined(PETSC_USE_MAT_SINGLE)
468   ierr = PetscFree(array);
469 #endif
470 
471   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
472   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
473 
474   if (B) {
475     *B = C;
476   } else {
477     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
478   }
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
484 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
485 {
486   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
487   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
488   PetscScalar *aa;
489   FILE        *file;
490 
491   PetscFunctionBegin;
492   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
493   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
494   col_lens[0] = MAT_FILE_COOKIE;
495 
496   col_lens[1] = A->m;
497   col_lens[2] = A->n;
498   col_lens[3] = a->nz*bs2;
499 
500   /* store lengths of each row and write (including header) to file */
501   count = 0;
502   for (i=0; i<a->mbs; i++) {
503     for (j=0; j<bs; j++) {
504       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
505     }
506   }
507   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
508   ierr = PetscFree(col_lens);CHKERRQ(ierr);
509 
510   /* store column indices (zero start index) */
511   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
512   count = 0;
513   for (i=0; i<a->mbs; i++) {
514     for (j=0; j<bs; j++) {
515       for (k=a->i[i]; k<a->i[i+1]; k++) {
516         for (l=0; l<bs; l++) {
517           jj[count++] = bs*a->j[k] + l;
518         }
519       }
520     }
521   }
522   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
523   ierr = PetscFree(jj);CHKERRQ(ierr);
524 
525   /* store nonzero values */
526   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
527   count = 0;
528   for (i=0; i<a->mbs; i++) {
529     for (j=0; j<bs; j++) {
530       for (k=a->i[i]; k<a->i[i+1]; k++) {
531         for (l=0; l<bs; l++) {
532           aa[count++] = a->a[bs2*k + l*bs + j];
533         }
534       }
535     }
536   }
537   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
538   ierr = PetscFree(aa);CHKERRQ(ierr);
539 
540   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
541   if (file) {
542     fprintf(file,"-matload_block_size %d\n",a->bs);
543   }
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
549 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
550 {
551   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
552   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
553   PetscViewerFormat format;
554 
555   PetscFunctionBegin;
556   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
557   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
558     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
559   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
560     Mat aij;
561     ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr);
562     ierr = MatView(aij,viewer);CHKERRQ(ierr);
563     ierr = MatDestroy(aij);CHKERRQ(ierr);
564   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
565      PetscFunctionReturn(0);
566   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
567     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
568     for (i=0; i<a->mbs; i++) {
569       for (j=0; j<bs; j++) {
570         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
571         for (k=a->i[i]; k<a->i[i+1]; k++) {
572           for (l=0; l<bs; l++) {
573 #if defined(PETSC_USE_COMPLEX)
574             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
575               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
576                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
577             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
578               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
579                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
580             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
581               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
582             }
583 #else
584             if (a->a[bs2*k + l*bs + j] != 0.0) {
585               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
586             }
587 #endif
588           }
589         }
590         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
591       }
592     }
593     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
594   } else {
595     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
596     for (i=0; i<a->mbs; i++) {
597       for (j=0; j<bs; j++) {
598         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
599         for (k=a->i[i]; k<a->i[i+1]; k++) {
600           for (l=0; l<bs; l++) {
601 #if defined(PETSC_USE_COMPLEX)
602             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
603               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
604                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
605             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
606               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
607                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
608             } else {
609               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
610             }
611 #else
612             ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
613 #endif
614           }
615         }
616         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
617       }
618     }
619     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
620   }
621   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
622   PetscFunctionReturn(0);
623 }
624 
625 #undef __FUNCT__
626 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
627 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
628 {
629   Mat          A = (Mat) Aa;
630   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
631   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
632   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
633   MatScalar    *aa;
634   PetscViewer  viewer;
635 
636   PetscFunctionBegin;
637 
638   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
639   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
640 
641   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
642 
643   /* loop over matrix elements drawing boxes */
644   color = PETSC_DRAW_BLUE;
645   for (i=0,row=0; i<mbs; i++,row+=bs) {
646     for (j=a->i[i]; j<a->i[i+1]; j++) {
647       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
648       x_l = a->j[j]*bs; x_r = x_l + 1.0;
649       aa = a->a + j*bs2;
650       for (k=0; k<bs; k++) {
651         for (l=0; l<bs; l++) {
652           if (PetscRealPart(*aa++) >=  0.) continue;
653           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
654         }
655       }
656     }
657   }
658   color = PETSC_DRAW_CYAN;
659   for (i=0,row=0; i<mbs; i++,row+=bs) {
660     for (j=a->i[i]; j<a->i[i+1]; j++) {
661       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
662       x_l = a->j[j]*bs; x_r = x_l + 1.0;
663       aa = a->a + j*bs2;
664       for (k=0; k<bs; k++) {
665         for (l=0; l<bs; l++) {
666           if (PetscRealPart(*aa++) != 0.) continue;
667           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
668         }
669       }
670     }
671   }
672 
673   color = PETSC_DRAW_RED;
674   for (i=0,row=0; i<mbs; i++,row+=bs) {
675     for (j=a->i[i]; j<a->i[i+1]; j++) {
676       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
677       x_l = a->j[j]*bs; x_r = x_l + 1.0;
678       aa = a->a + j*bs2;
679       for (k=0; k<bs; k++) {
680         for (l=0; l<bs; l++) {
681           if (PetscRealPart(*aa++) <= 0.) continue;
682           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
683         }
684       }
685     }
686   }
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
692 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
693 {
694   int          ierr;
695   PetscReal    xl,yl,xr,yr,w,h;
696   PetscDraw    draw;
697   PetscTruth   isnull;
698 
699   PetscFunctionBegin;
700 
701   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
702   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
703 
704   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
705   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
706   xr += w;    yr += h;  xl = -w;     yl = -h;
707   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
708   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
709   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
710   PetscFunctionReturn(0);
711 }
712 
713 #undef __FUNCT__
714 #define __FUNCT__ "MatView_SeqBAIJ"
715 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
716 {
717   int        ierr;
718   PetscTruth isascii,isbinary,isdraw;
719 
720   PetscFunctionBegin;
721   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
722   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
723   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
724   if (isascii){
725     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
726   } else if (isbinary) {
727     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
728   } else if (isdraw) {
729     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
730   } else {
731     Mat B;
732     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
733     ierr = MatView(B,viewer);CHKERRQ(ierr);
734     ierr = MatDestroy(B);CHKERRQ(ierr);
735   }
736   PetscFunctionReturn(0);
737 }
738 
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatGetValues_SeqBAIJ"
742 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
743 {
744   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
745   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
746   int        *ai = a->i,*ailen = a->ilen;
747   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
748   MatScalar  *ap,*aa = a->a,zero = 0.0;
749 
750   PetscFunctionBegin;
751   for (k=0; k<m; k++) { /* loop over rows */
752     row  = im[k]; brow = row/bs;
753     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
754     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row);
755     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
756     nrow = ailen[brow];
757     for (l=0; l<n; l++) { /* loop over columns */
758       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
759       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]);
760       col  = in[l] ;
761       bcol = col/bs;
762       cidx = col%bs;
763       ridx = row%bs;
764       high = nrow;
765       low  = 0; /* assume unsorted */
766       while (high-low > 5) {
767         t = (low+high)/2;
768         if (rp[t] > bcol) high = t;
769         else             low  = t;
770       }
771       for (i=low; i<high; i++) {
772         if (rp[i] > bcol) break;
773         if (rp[i] == bcol) {
774           *v++ = ap[bs2*i+bs*cidx+ridx];
775           goto finished;
776         }
777       }
778       *v++ = zero;
779       finished:;
780     }
781   }
782   PetscFunctionReturn(0);
783 }
784 
785 #if defined(PETSC_USE_MAT_SINGLE)
786 #undef __FUNCT__
787 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
788 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
789 {
790   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
791   int         ierr,i,N = m*n*b->bs2;
792   MatScalar   *vsingle;
793 
794   PetscFunctionBegin;
795   if (N > b->setvalueslen) {
796     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
797     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
798     b->setvalueslen  = N;
799   }
800   vsingle = b->setvaluescopy;
801   for (i=0; i<N; i++) {
802     vsingle[i] = v[i];
803   }
804   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
805   PetscFunctionReturn(0);
806 }
807 #endif
808 
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
812 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is)
813 {
814   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
815   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
816   int               *imax=a->imax,*ai=a->i,*ailen=a->ilen;
817   int               *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
818   PetscTruth        roworiented=a->roworiented;
819   const MatScalar   *value = v;
820   MatScalar         *ap,*aa = a->a,*bap;
821 
822   PetscFunctionBegin;
823   if (roworiented) {
824     stepval = (n-1)*bs;
825   } else {
826     stepval = (m-1)*bs;
827   }
828   for (k=0; k<m; k++) { /* loop over added rows */
829     row  = im[k];
830     if (row < 0) continue;
831 #if defined(PETSC_USE_BOPT_g)
832     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
833 #endif
834     rp   = aj + ai[row];
835     ap   = aa + bs2*ai[row];
836     rmax = imax[row];
837     nrow = ailen[row];
838     low  = 0;
839     for (l=0; l<n; l++) { /* loop over added columns */
840       if (in[l] < 0) continue;
841 #if defined(PETSC_USE_BOPT_g)
842       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1);
843 #endif
844       col = in[l];
845       if (roworiented) {
846         value = v + k*(stepval+bs)*bs + l*bs;
847       } else {
848         value = v + l*(stepval+bs)*bs + k*bs;
849       }
850       if (!sorted) low = 0; high = nrow;
851       while (high-low > 7) {
852         t = (low+high)/2;
853         if (rp[t] > col) high = t;
854         else             low  = t;
855       }
856       for (i=low; i<high; i++) {
857         if (rp[i] > col) break;
858         if (rp[i] == col) {
859           bap  = ap +  bs2*i;
860           if (roworiented) {
861             if (is == ADD_VALUES) {
862               for (ii=0; ii<bs; ii++,value+=stepval) {
863                 for (jj=ii; jj<bs2; jj+=bs) {
864                   bap[jj] += *value++;
865                 }
866               }
867             } else {
868               for (ii=0; ii<bs; ii++,value+=stepval) {
869                 for (jj=ii; jj<bs2; jj+=bs) {
870                   bap[jj] = *value++;
871                 }
872               }
873             }
874           } else {
875             if (is == ADD_VALUES) {
876               for (ii=0; ii<bs; ii++,value+=stepval) {
877                 for (jj=0; jj<bs; jj++) {
878                   *bap++ += *value++;
879                 }
880               }
881             } else {
882               for (ii=0; ii<bs; ii++,value+=stepval) {
883                 for (jj=0; jj<bs; jj++) {
884                   *bap++  = *value++;
885                 }
886               }
887             }
888           }
889           goto noinsert2;
890         }
891       }
892       if (nonew == 1) goto noinsert2;
893       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
894       if (nrow >= rmax) {
895         /* there is no extra room in row, therefore enlarge */
896         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
897         MatScalar *new_a;
898 
899         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
900 
901         /* malloc new storage space */
902         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
903 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
904         new_j   = (int*)(new_a + bs2*new_nz);
905         new_i   = new_j + new_nz;
906 
907         /* copy over old data into new slots */
908         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
909         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
910         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
911         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
912         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
913         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
914         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
915         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
916         /* free up old matrix storage */
917         ierr = PetscFree(a->a);CHKERRQ(ierr);
918         if (!a->singlemalloc) {
919           ierr = PetscFree(a->i);CHKERRQ(ierr);
920           ierr = PetscFree(a->j);CHKERRQ(ierr);
921         }
922         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
923         a->singlemalloc = PETSC_TRUE;
924 
925         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
926         rmax = imax[row] = imax[row] + CHUNKSIZE;
927         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
928         a->maxnz += bs2*CHUNKSIZE;
929         a->reallocs++;
930         a->nz++;
931       }
932       N = nrow++ - 1;
933       /* shift up all the later entries in this row */
934       for (ii=N; ii>=i; ii--) {
935         rp[ii+1] = rp[ii];
936         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
937       }
938       if (N >= i) {
939         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
940       }
941       rp[i] = col;
942       bap   = ap +  bs2*i;
943       if (roworiented) {
944         for (ii=0; ii<bs; ii++,value+=stepval) {
945           for (jj=ii; jj<bs2; jj+=bs) {
946             bap[jj] = *value++;
947           }
948         }
949       } else {
950         for (ii=0; ii<bs; ii++,value+=stepval) {
951           for (jj=0; jj<bs; jj++) {
952             *bap++  = *value++;
953           }
954         }
955       }
956       noinsert2:;
957       low = i;
958     }
959     ailen[row] = nrow;
960   }
961   PetscFunctionReturn(0);
962 }
963 
964 #undef __FUNCT__
965 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
966 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
967 {
968   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
969   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
970   int        m = A->m,*ip,N,*ailen = a->ilen;
971   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
972   MatScalar  *aa = a->a,*ap;
973 
974   PetscFunctionBegin;
975   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
976 
977   if (m) rmax = ailen[0];
978   for (i=1; i<mbs; i++) {
979     /* move each row back by the amount of empty slots (fshift) before it*/
980     fshift += imax[i-1] - ailen[i-1];
981     rmax   = PetscMax(rmax,ailen[i]);
982     if (fshift) {
983       ip = aj + ai[i]; ap = aa + bs2*ai[i];
984       N = ailen[i];
985       for (j=0; j<N; j++) {
986         ip[j-fshift] = ip[j];
987         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
988       }
989     }
990     ai[i] = ai[i-1] + ailen[i-1];
991   }
992   if (mbs) {
993     fshift += imax[mbs-1] - ailen[mbs-1];
994     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
995   }
996   /* reset ilen and imax for each row */
997   for (i=0; i<mbs; i++) {
998     ailen[i] = imax[i] = ai[i+1] - ai[i];
999   }
1000   a->nz = ai[mbs];
1001 
1002   /* diagonals may have moved, so kill the diagonal pointers */
1003   if (fshift && a->diag) {
1004     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1005     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
1006     a->diag = 0;
1007   }
1008   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1009   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
1010   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
1011   a->reallocs          = 0;
1012   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1013 
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 
1018 
1019 /*
1020    This function returns an array of flags which indicate the locations of contiguous
1021    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1022    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1023    Assume: sizes should be long enough to hold all the values.
1024 */
1025 #undef __FUNCT__
1026 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1027 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1028 {
1029   int        i,j,k,row;
1030   PetscTruth flg;
1031 
1032   PetscFunctionBegin;
1033   for (i=0,j=0; i<n; j++) {
1034     row = idx[i];
1035     if (row%bs!=0) { /* Not the begining of a block */
1036       sizes[j] = 1;
1037       i++;
1038     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1039       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1040       i++;
1041     } else { /* Begining of the block, so check if the complete block exists */
1042       flg = PETSC_TRUE;
1043       for (k=1; k<bs; k++) {
1044         if (row+k != idx[i+k]) { /* break in the block */
1045           flg = PETSC_FALSE;
1046           break;
1047         }
1048       }
1049       if (flg == PETSC_TRUE) { /* No break in the bs */
1050         sizes[j] = bs;
1051         i+= bs;
1052       } else {
1053         sizes[j] = 1;
1054         i++;
1055       }
1056     }
1057   }
1058   *bs_max = j;
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 #undef __FUNCT__
1063 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1064 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1065 {
1066   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1067   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
1068   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1069   PetscScalar zero = 0.0;
1070   MatScalar   *aa;
1071 
1072   PetscFunctionBegin;
1073   /* Make a copy of the IS and  sort it */
1074   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1075   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1076 
1077   /* allocate memory for rows,sizes */
1078   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
1079   sizes = rows + is_n;
1080 
1081   /* copy IS values to rows, and sort them */
1082   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1083   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1084   if (baij->keepzeroedrows) {
1085     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1086     bs_max = is_n;
1087   } else {
1088     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1089   }
1090   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1091 
1092   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1093     row   = rows[j];
1094     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1095     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1096     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1097     if (sizes[i] == bs && !baij->keepzeroedrows) {
1098       if (diag) {
1099         if (baij->ilen[row/bs] > 0) {
1100           baij->ilen[row/bs]       = 1;
1101           baij->j[baij->i[row/bs]] = row/bs;
1102           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1103         }
1104         /* Now insert all the diagonal values for this bs */
1105         for (k=0; k<bs; k++) {
1106           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1107         }
1108       } else { /* (!diag) */
1109         baij->ilen[row/bs] = 0;
1110       } /* end (!diag) */
1111     } else { /* (sizes[i] != bs) */
1112 #if defined (PETSC_USE_DEBUG)
1113       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1114 #endif
1115       for (k=0; k<count; k++) {
1116         aa[0] =  zero;
1117         aa    += bs;
1118       }
1119       if (diag) {
1120         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1121       }
1122     }
1123   }
1124 
1125   ierr = PetscFree(rows);CHKERRQ(ierr);
1126   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1132 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
1133 {
1134   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1135   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1136   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1137   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1138   int         ridx,cidx,bs2=a->bs2,ierr;
1139   PetscTruth  roworiented=a->roworiented;
1140   MatScalar   *ap,value,*aa=a->a,*bap;
1141 
1142   PetscFunctionBegin;
1143   for (k=0; k<m; k++) { /* loop over added rows */
1144     row  = im[k]; brow = row/bs;
1145     if (row < 0) continue;
1146 #if defined(PETSC_USE_BOPT_g)
1147     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
1148 #endif
1149     rp   = aj + ai[brow];
1150     ap   = aa + bs2*ai[brow];
1151     rmax = imax[brow];
1152     nrow = ailen[brow];
1153     low  = 0;
1154     for (l=0; l<n; l++) { /* loop over added columns */
1155       if (in[l] < 0) continue;
1156 #if defined(PETSC_USE_BOPT_g)
1157       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
1158 #endif
1159       col = in[l]; bcol = col/bs;
1160       ridx = row % bs; cidx = col % bs;
1161       if (roworiented) {
1162         value = v[l + k*n];
1163       } else {
1164         value = v[k + l*m];
1165       }
1166       if (!sorted) low = 0; high = nrow;
1167       while (high-low > 7) {
1168         t = (low+high)/2;
1169         if (rp[t] > bcol) high = t;
1170         else              low  = t;
1171       }
1172       for (i=low; i<high; i++) {
1173         if (rp[i] > bcol) break;
1174         if (rp[i] == bcol) {
1175           bap  = ap +  bs2*i + bs*cidx + ridx;
1176           if (is == ADD_VALUES) *bap += value;
1177           else                  *bap  = value;
1178           goto noinsert1;
1179         }
1180       }
1181       if (nonew == 1) goto noinsert1;
1182       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1183       if (nrow >= rmax) {
1184         /* there is no extra room in row, therefore enlarge */
1185         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1186         MatScalar *new_a;
1187 
1188         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1189 
1190         /* Malloc new storage space */
1191         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1192 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1193         new_j   = (int*)(new_a + bs2*new_nz);
1194         new_i   = new_j + new_nz;
1195 
1196         /* copy over old data into new slots */
1197         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1198         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1199         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1200         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1201         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1202         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1203         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1204         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1205         /* free up old matrix storage */
1206         ierr = PetscFree(a->a);CHKERRQ(ierr);
1207         if (!a->singlemalloc) {
1208           ierr = PetscFree(a->i);CHKERRQ(ierr);
1209           ierr = PetscFree(a->j);CHKERRQ(ierr);
1210         }
1211         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1212         a->singlemalloc = PETSC_TRUE;
1213 
1214         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1215         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1216         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1217         a->maxnz += bs2*CHUNKSIZE;
1218         a->reallocs++;
1219         a->nz++;
1220       }
1221       N = nrow++ - 1;
1222       /* shift up all the later entries in this row */
1223       for (ii=N; ii>=i; ii--) {
1224         rp[ii+1] = rp[ii];
1225         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1226       }
1227       if (N>=i) {
1228         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1229       }
1230       rp[i]                      = bcol;
1231       ap[bs2*i + bs*cidx + ridx] = value;
1232       noinsert1:;
1233       low = i;
1234     }
1235     ailen[brow] = nrow;
1236   }
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1243 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1244 {
1245   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1246   Mat         outA;
1247   int         ierr;
1248   PetscTruth  row_identity,col_identity;
1249 
1250   PetscFunctionBegin;
1251   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1252   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1253   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1254   if (!row_identity || !col_identity) {
1255     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1256   }
1257 
1258   outA          = inA;
1259   inA->factor   = FACTOR_LU;
1260 
1261   if (!a->diag) {
1262     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1263   }
1264 
1265   a->row        = row;
1266   a->col        = col;
1267   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1268   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1269 
1270   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1271   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1272   PetscLogObjectParent(inA,a->icol);
1273 
1274   /*
1275       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1276       for ILU(0) factorization with natural ordering
1277   */
1278   if (a->bs < 8) {
1279     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1280   } else {
1281     if (!a->solve_work) {
1282       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1283       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1284     }
1285   }
1286 
1287   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1288 
1289   PetscFunctionReturn(0);
1290 }
1291 #undef __FUNCT__
1292 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1293 int MatPrintHelp_SeqBAIJ(Mat A)
1294 {
1295   static PetscTruth called = PETSC_FALSE;
1296   MPI_Comm          comm = A->comm;
1297   int               ierr;
1298 
1299   PetscFunctionBegin;
1300   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1301   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1302   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 EXTERN_C_BEGIN
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1309 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1310 {
1311   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1312   int         i,nz,nbs;
1313 
1314   PetscFunctionBegin;
1315   nz  = baij->maxnz/baij->bs2;
1316   nbs = baij->nbs;
1317   for (i=0; i<nz; i++) {
1318     baij->j[i] = indices[i];
1319   }
1320   baij->nz = nz;
1321   for (i=0; i<nbs; i++) {
1322     baij->ilen[i] = baij->imax[i];
1323   }
1324 
1325   PetscFunctionReturn(0);
1326 }
1327 EXTERN_C_END
1328 
1329 #undef __FUNCT__
1330 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1331 /*@
1332     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1333        in the matrix.
1334 
1335   Input Parameters:
1336 +  mat - the SeqBAIJ matrix
1337 -  indices - the column indices
1338 
1339   Level: advanced
1340 
1341   Notes:
1342     This can be called if you have precomputed the nonzero structure of the
1343   matrix and want to provide it to the matrix object to improve the performance
1344   of the MatSetValues() operation.
1345 
1346     You MUST have set the correct numbers of nonzeros per row in the call to
1347   MatCreateSeqBAIJ().
1348 
1349     MUST be called before any calls to MatSetValues();
1350 
1351 @*/
1352 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1353 {
1354   int ierr,(*f)(Mat,int *);
1355 
1356   PetscFunctionBegin;
1357   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1358   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1359   if (f) {
1360     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1361   } else {
1362     SETERRQ(1,"Wrong type of matrix to set column indices");
1363   }
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1369 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1370 {
1371   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1372   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1373   PetscReal    atmp;
1374   PetscScalar  *x,zero = 0.0;
1375   MatScalar    *aa;
1376   int          ncols,brow,krow,kcol;
1377 
1378   PetscFunctionBegin;
1379   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1380   bs   = a->bs;
1381   aa   = a->a;
1382   ai   = a->i;
1383   aj   = a->j;
1384   mbs = a->mbs;
1385 
1386   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1387   ierr = VecGetArrayFast(v,&x);CHKERRQ(ierr);
1388   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1389   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1390   for (i=0; i<mbs; i++) {
1391     ncols = ai[1] - ai[0]; ai++;
1392     brow  = bs*i;
1393     for (j=0; j<ncols; j++){
1394       /* bcol = bs*(*aj); */
1395       for (kcol=0; kcol<bs; kcol++){
1396         for (krow=0; krow<bs; krow++){
1397           atmp = PetscAbsScalar(*aa); aa++;
1398           row = brow + krow;    /* row index */
1399           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1400           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1401         }
1402       }
1403       aj++;
1404     }
1405   }
1406   ierr = VecRestoreArrayFast(v,&x);CHKERRQ(ierr);
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1412 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1413 {
1414   int        ierr;
1415 
1416   PetscFunctionBegin;
1417   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1423 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1424 {
1425   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1426   PetscFunctionBegin;
1427   *array = a->a;
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 #undef __FUNCT__
1432 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1433 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1434 {
1435   PetscFunctionBegin;
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #include "petscblaslapack.h"
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1442 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1443 {
1444   Mat_SeqBAIJ  *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1445   int          ierr,one=1,i,bs=y->bs,j,bs2;
1446 
1447   PetscFunctionBegin;
1448   if (str == SAME_NONZERO_PATTERN) {
1449     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1450   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1451     if (y->xtoy && y->XtoY != X) {
1452       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1453       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1454     }
1455     if (!y->xtoy) { /* get xtoy */
1456       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1457       y->XtoY = X;
1458     }
1459     bs2 = bs*bs;
1460     for (i=0; i<x->nz; i++) {
1461       j = 0;
1462       while (j < bs2){
1463         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1464         j++;
1465       }
1466     }
1467     PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1468   } else {
1469     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1470   }
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 /* -------------------------------------------------------------------*/
1475 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1476        MatGetRow_SeqBAIJ,
1477        MatRestoreRow_SeqBAIJ,
1478        MatMult_SeqBAIJ_N,
1479 /* 4*/ MatMultAdd_SeqBAIJ_N,
1480        MatMultTranspose_SeqBAIJ,
1481        MatMultTransposeAdd_SeqBAIJ,
1482        MatSolve_SeqBAIJ_N,
1483        0,
1484        0,
1485 /*10*/ 0,
1486        MatLUFactor_SeqBAIJ,
1487        0,
1488        0,
1489        MatTranspose_SeqBAIJ,
1490 /*15*/ MatGetInfo_SeqBAIJ,
1491        MatEqual_SeqBAIJ,
1492        MatGetDiagonal_SeqBAIJ,
1493        MatDiagonalScale_SeqBAIJ,
1494        MatNorm_SeqBAIJ,
1495 /*20*/ 0,
1496        MatAssemblyEnd_SeqBAIJ,
1497        0,
1498        MatSetOption_SeqBAIJ,
1499        MatZeroEntries_SeqBAIJ,
1500 /*25*/ MatZeroRows_SeqBAIJ,
1501        MatLUFactorSymbolic_SeqBAIJ,
1502        MatLUFactorNumeric_SeqBAIJ_N,
1503        0,
1504        0,
1505 /*30*/ MatSetUpPreallocation_SeqBAIJ,
1506        MatILUFactorSymbolic_SeqBAIJ,
1507        0,
1508        MatGetArray_SeqBAIJ,
1509        MatRestoreArray_SeqBAIJ,
1510 /*35*/ MatDuplicate_SeqBAIJ,
1511        0,
1512        0,
1513        MatILUFactor_SeqBAIJ,
1514        0,
1515 /*40*/ MatAXPY_SeqBAIJ,
1516        MatGetSubMatrices_SeqBAIJ,
1517        MatIncreaseOverlap_SeqBAIJ,
1518        MatGetValues_SeqBAIJ,
1519        0,
1520 /*45*/ MatPrintHelp_SeqBAIJ,
1521        MatScale_SeqBAIJ,
1522        0,
1523        0,
1524        0,
1525 /*50*/ MatGetBlockSize_SeqBAIJ,
1526        MatGetRowIJ_SeqBAIJ,
1527        MatRestoreRowIJ_SeqBAIJ,
1528        0,
1529        0,
1530 /*55*/ 0,
1531        0,
1532        0,
1533        0,
1534        MatSetValuesBlocked_SeqBAIJ,
1535 /*60*/ MatGetSubMatrix_SeqBAIJ,
1536        MatDestroy_SeqBAIJ,
1537        MatView_SeqBAIJ,
1538        MatGetPetscMaps_Petsc,
1539        0,
1540 /*65*/ 0,
1541        0,
1542        0,
1543        0,
1544        0,
1545 /*70*/ MatGetRowMax_SeqBAIJ,
1546        MatConvert_Basic,
1547        0,
1548        0,
1549        0,
1550 /*75*/ 0,
1551        0,
1552        0,
1553        0,
1554        0,
1555 /*80*/ 0,
1556        0,
1557        0,
1558        0,
1559 /*85*/ MatLoad_SeqBAIJ
1560 };
1561 
1562 EXTERN_C_BEGIN
1563 #undef __FUNCT__
1564 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1565 int MatStoreValues_SeqBAIJ(Mat mat)
1566 {
1567   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1568   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1569   int         ierr;
1570 
1571   PetscFunctionBegin;
1572   if (aij->nonew != 1) {
1573     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1574   }
1575 
1576   /* allocate space for values if not already there */
1577   if (!aij->saved_values) {
1578     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1579   }
1580 
1581   /* copy values over */
1582   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1583   PetscFunctionReturn(0);
1584 }
1585 EXTERN_C_END
1586 
1587 EXTERN_C_BEGIN
1588 #undef __FUNCT__
1589 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1590 int MatRetrieveValues_SeqBAIJ(Mat mat)
1591 {
1592   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1593   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1594 
1595   PetscFunctionBegin;
1596   if (aij->nonew != 1) {
1597     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1598   }
1599   if (!aij->saved_values) {
1600     SETERRQ(1,"Must call MatStoreValues(A);first");
1601   }
1602 
1603   /* copy values over */
1604   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1605   PetscFunctionReturn(0);
1606 }
1607 EXTERN_C_END
1608 
1609 EXTERN_C_BEGIN
1610 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*);
1611 extern int MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*);
1612 EXTERN_C_END
1613 
1614 EXTERN_C_BEGIN
1615 #undef __FUNCT__
1616 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
1617 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz)
1618 {
1619   Mat_SeqBAIJ *b;
1620   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1621   PetscTruth  flg;
1622 
1623   PetscFunctionBegin;
1624 
1625   B->preallocated = PETSC_TRUE;
1626   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1627   if (nnz && newbs != bs) {
1628     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1629   }
1630   bs   = newbs;
1631 
1632   mbs  = B->m/bs;
1633   nbs  = B->n/bs;
1634   bs2  = bs*bs;
1635 
1636   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1637     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1638   }
1639 
1640   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1641   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1642   if (nnz) {
1643     for (i=0; i<mbs; i++) {
1644       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1645       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1646     }
1647   }
1648 
1649   b       = (Mat_SeqBAIJ*)B->data;
1650   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1651   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1652   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1653   if (!flg) {
1654     switch (bs) {
1655     case 1:
1656       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1657       B->ops->mult            = MatMult_SeqBAIJ_1;
1658       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1659       break;
1660     case 2:
1661       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1662       B->ops->mult            = MatMult_SeqBAIJ_2;
1663       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1664       break;
1665     case 3:
1666       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1667       B->ops->mult            = MatMult_SeqBAIJ_3;
1668       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1669       break;
1670     case 4:
1671       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1672       B->ops->mult            = MatMult_SeqBAIJ_4;
1673       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1674       break;
1675     case 5:
1676       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1677       B->ops->mult            = MatMult_SeqBAIJ_5;
1678       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1679       break;
1680     case 6:
1681       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1682       B->ops->mult            = MatMult_SeqBAIJ_6;
1683       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1684       break;
1685     case 7:
1686       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1687       B->ops->mult            = MatMult_SeqBAIJ_7;
1688       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1689       break;
1690     default:
1691       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1692       B->ops->mult            = MatMult_SeqBAIJ_N;
1693       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1694       break;
1695     }
1696   }
1697   b->bs      = bs;
1698   b->mbs     = mbs;
1699   b->nbs     = nbs;
1700   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1701   if (!nnz) {
1702     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1703     else if (nz <= 0)        nz = 1;
1704     for (i=0; i<mbs; i++) b->imax[i] = nz;
1705     nz = nz*mbs;
1706   } else {
1707     nz = 0;
1708     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1709   }
1710 
1711   /* allocate the matrix space */
1712   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1713   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1714   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1715   b->j  = (int*)(b->a + nz*bs2);
1716   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1717   b->i  = b->j + nz;
1718   b->singlemalloc = PETSC_TRUE;
1719 
1720   b->i[0] = 0;
1721   for (i=1; i<mbs+1; i++) {
1722     b->i[i] = b->i[i-1] + b->imax[i-1];
1723   }
1724 
1725   /* b->ilen will count nonzeros in each block row so far. */
1726   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1727   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1728   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1729 
1730   b->bs               = bs;
1731   b->bs2              = bs2;
1732   b->mbs              = mbs;
1733   b->nz               = 0;
1734   b->maxnz            = nz*bs2;
1735   B->info.nz_unneeded = (PetscReal)b->maxnz;
1736   PetscFunctionReturn(0);
1737 }
1738 EXTERN_C_END
1739 
1740 /*MC
1741    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
1742    block sparse compressed row format.
1743 
1744    Options Database Keys:
1745 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
1746 
1747   Level: beginner
1748 
1749 .seealso: MatCreateSeqBAIJ
1750 M*/
1751 
1752 EXTERN_C_BEGIN
1753 #undef __FUNCT__
1754 #define __FUNCT__ "MatCreate_SeqBAIJ"
1755 int MatCreate_SeqBAIJ(Mat B)
1756 {
1757   int         ierr,size;
1758   Mat_SeqBAIJ *b;
1759 
1760   PetscFunctionBegin;
1761   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1762   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1763 
1764   B->m = B->M = PetscMax(B->m,B->M);
1765   B->n = B->N = PetscMax(B->n,B->N);
1766   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1767   B->data = (void*)b;
1768   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1769   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1770   B->factor           = 0;
1771   B->lupivotthreshold = 1.0;
1772   B->mapping          = 0;
1773   b->row              = 0;
1774   b->col              = 0;
1775   b->icol             = 0;
1776   b->reallocs         = 0;
1777   b->saved_values     = 0;
1778 #if defined(PETSC_USE_MAT_SINGLE)
1779   b->setvalueslen     = 0;
1780   b->setvaluescopy    = PETSC_NULL;
1781 #endif
1782 
1783   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1784   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1785 
1786   b->sorted           = PETSC_FALSE;
1787   b->roworiented      = PETSC_TRUE;
1788   b->nonew            = 0;
1789   b->diag             = 0;
1790   b->solve_work       = 0;
1791   b->mult_work        = 0;
1792   B->spptr            = 0;
1793   B->info.nz_unneeded = (PetscReal)b->maxnz;
1794   b->keepzeroedrows   = PETSC_FALSE;
1795   b->xtoy              = 0;
1796   b->XtoY              = 0;
1797 
1798   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1799                                      "MatStoreValues_SeqBAIJ",
1800                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1801   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1802                                      "MatRetrieveValues_SeqBAIJ",
1803                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1804   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1805                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1806                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1807   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
1808                                      "MatConvert_SeqBAIJ_SeqAIJ",
1809                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1810 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
1811                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
1812                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
1813   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
1814                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
1815                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
1816   PetscFunctionReturn(0);
1817 }
1818 EXTERN_C_END
1819 
1820 #undef __FUNCT__
1821 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1822 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1823 {
1824   Mat         C;
1825   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1826   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1827 
1828   PetscFunctionBegin;
1829   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1830 
1831   *B = 0;
1832   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1833   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1834   c    = (Mat_SeqBAIJ*)C->data;
1835 
1836   C->M   = A->M;
1837   C->N   = A->N;
1838   c->bs  = a->bs;
1839   c->bs2 = a->bs2;
1840   c->mbs = a->mbs;
1841   c->nbs = a->nbs;
1842   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1843 
1844   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1845   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1846   for (i=0; i<mbs; i++) {
1847     c->imax[i] = a->imax[i];
1848     c->ilen[i] = a->ilen[i];
1849   }
1850 
1851   /* allocate the matrix space */
1852   c->singlemalloc = PETSC_TRUE;
1853   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1854   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1855   c->j = (int*)(c->a + nz*bs2);
1856   c->i = c->j + nz;
1857   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1858   if (mbs > 0) {
1859     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1860     if (cpvalues == MAT_COPY_VALUES) {
1861       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1862     } else {
1863       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1864     }
1865   }
1866 
1867   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1868   c->sorted      = a->sorted;
1869   c->roworiented = a->roworiented;
1870   c->nonew       = a->nonew;
1871 
1872   if (a->diag) {
1873     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1874     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1875     for (i=0; i<mbs; i++) {
1876       c->diag[i] = a->diag[i];
1877     }
1878   } else c->diag        = 0;
1879   c->nz                 = a->nz;
1880   c->maxnz              = a->maxnz;
1881   c->solve_work         = 0;
1882   C->spptr              = 0;     /* Dangerous -I'm throwing away a->spptr */
1883   c->mult_work          = 0;
1884   C->preallocated       = PETSC_TRUE;
1885   C->assembled          = PETSC_TRUE;
1886   *B = C;
1887   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1888   PetscFunctionReturn(0);
1889 }
1890 
1891 #undef __FUNCT__
1892 #define __FUNCT__ "MatLoad_SeqBAIJ"
1893 int MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
1894 {
1895   Mat_SeqBAIJ  *a;
1896   Mat          B;
1897   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1898   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1899   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1900   int          *masked,nmask,tmp,bs2,ishift;
1901   PetscScalar  *aa;
1902   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1903 
1904   PetscFunctionBegin;
1905   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1906   bs2  = bs*bs;
1907 
1908   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1909   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1910   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1911   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1912   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1913   M = header[1]; N = header[2]; nz = header[3];
1914 
1915   if (header[3] < 0) {
1916     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1917   }
1918 
1919   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1920 
1921   /*
1922      This code adds extra rows to make sure the number of rows is
1923     divisible by the blocksize
1924   */
1925   mbs        = M/bs;
1926   extra_rows = bs - M + bs*(mbs);
1927   if (extra_rows == bs) extra_rows = 0;
1928   else                  mbs++;
1929   if (extra_rows) {
1930     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1931   }
1932 
1933   /* read in row lengths */
1934   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1935   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1936   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1937 
1938   /* read in column indices */
1939   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1940   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1941   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1942 
1943   /* loop over row lengths determining block row lengths */
1944   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1945   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1946   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1947   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1948   masked   = mask + mbs;
1949   rowcount = 0; nzcount = 0;
1950   for (i=0; i<mbs; i++) {
1951     nmask = 0;
1952     for (j=0; j<bs; j++) {
1953       kmax = rowlengths[rowcount];
1954       for (k=0; k<kmax; k++) {
1955         tmp = jj[nzcount++]/bs;
1956         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1957       }
1958       rowcount++;
1959     }
1960     browlengths[i] += nmask;
1961     /* zero out the mask elements we set */
1962     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1963   }
1964 
1965   /* create our matrix */
1966   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
1967   ierr = MatSetType(B,type);CHKERRQ(ierr);
1968   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
1969   a = (Mat_SeqBAIJ*)B->data;
1970 
1971   /* set matrix "i" values */
1972   a->i[0] = 0;
1973   for (i=1; i<= mbs; i++) {
1974     a->i[i]      = a->i[i-1] + browlengths[i-1];
1975     a->ilen[i-1] = browlengths[i-1];
1976   }
1977   a->nz         = 0;
1978   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1979 
1980   /* read in nonzero values */
1981   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1982   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1983   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1984 
1985   /* set "a" and "j" values into matrix */
1986   nzcount = 0; jcount = 0;
1987   for (i=0; i<mbs; i++) {
1988     nzcountb = nzcount;
1989     nmask    = 0;
1990     for (j=0; j<bs; j++) {
1991       kmax = rowlengths[i*bs+j];
1992       for (k=0; k<kmax; k++) {
1993         tmp = jj[nzcount++]/bs;
1994 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1995       }
1996     }
1997     /* sort the masked values */
1998     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1999 
2000     /* set "j" values into matrix */
2001     maskcount = 1;
2002     for (j=0; j<nmask; j++) {
2003       a->j[jcount++]  = masked[j];
2004       mask[masked[j]] = maskcount++;
2005     }
2006     /* set "a" values into matrix */
2007     ishift = bs2*a->i[i];
2008     for (j=0; j<bs; j++) {
2009       kmax = rowlengths[i*bs+j];
2010       for (k=0; k<kmax; k++) {
2011         tmp       = jj[nzcountb]/bs ;
2012         block     = mask[tmp] - 1;
2013         point     = jj[nzcountb] - bs*tmp;
2014         idx       = ishift + bs2*block + j + bs*point;
2015         a->a[idx] = (MatScalar)aa[nzcountb++];
2016       }
2017     }
2018     /* zero out the mask elements we set */
2019     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2020   }
2021   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2022 
2023   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2024   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2025   ierr = PetscFree(aa);CHKERRQ(ierr);
2026   ierr = PetscFree(jj);CHKERRQ(ierr);
2027   ierr = PetscFree(mask);CHKERRQ(ierr);
2028 
2029   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2030   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2031   ierr = MatView_Private(B);CHKERRQ(ierr);
2032 
2033   *A = B;
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 #undef __FUNCT__
2038 #define __FUNCT__ "MatCreateSeqBAIJ"
2039 /*@C
2040    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2041    compressed row) format.  For good matrix assembly performance the
2042    user should preallocate the matrix storage by setting the parameter nz
2043    (or the array nnz).  By setting these parameters accurately, performance
2044    during matrix assembly can be increased by more than a factor of 50.
2045 
2046    Collective on MPI_Comm
2047 
2048    Input Parameters:
2049 +  comm - MPI communicator, set to PETSC_COMM_SELF
2050 .  bs - size of block
2051 .  m - number of rows
2052 .  n - number of columns
2053 .  nz - number of nonzero blocks  per block row (same for all rows)
2054 -  nnz - array containing the number of nonzero blocks in the various block rows
2055          (possibly different for each block row) or PETSC_NULL
2056 
2057    Output Parameter:
2058 .  A - the matrix
2059 
2060    Options Database Keys:
2061 .   -mat_no_unroll - uses code that does not unroll the loops in the
2062                      block calculations (much slower)
2063 .    -mat_block_size - size of the blocks to use
2064 
2065    Level: intermediate
2066 
2067    Notes:
2068    A nonzero block is any block that as 1 or more nonzeros in it
2069 
2070    The block AIJ format is fully compatible with standard Fortran 77
2071    storage.  That is, the stored row and column indices can begin at
2072    either one (as in Fortran) or zero.  See the users' manual for details.
2073 
2074    Specify the preallocated storage with either nz or nnz (not both).
2075    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2076    allocation.  For additional details, see the users manual chapter on
2077    matrices.
2078 
2079 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2080 @*/
2081 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
2082 {
2083   int ierr;
2084 
2085   PetscFunctionBegin;
2086   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2087   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2088   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2089   PetscFunctionReturn(0);
2090 }
2091 
2092 #undef __FUNCT__
2093 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2094 /*@C
2095    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2096    per row in the matrix. For good matrix assembly performance the
2097    user should preallocate the matrix storage by setting the parameter nz
2098    (or the array nnz).  By setting these parameters accurately, performance
2099    during matrix assembly can be increased by more than a factor of 50.
2100 
2101    Collective on MPI_Comm
2102 
2103    Input Parameters:
2104 +  A - the matrix
2105 .  bs - size of block
2106 .  nz - number of block nonzeros per block row (same for all rows)
2107 -  nnz - array containing the number of block nonzeros in the various block rows
2108          (possibly different for each block row) or PETSC_NULL
2109 
2110    Options Database Keys:
2111 .   -mat_no_unroll - uses code that does not unroll the loops in the
2112                      block calculations (much slower)
2113 .    -mat_block_size - size of the blocks to use
2114 
2115    Level: intermediate
2116 
2117    Notes:
2118    The block AIJ format is fully compatible with standard Fortran 77
2119    storage.  That is, the stored row and column indices can begin at
2120    either one (as in Fortran) or zero.  See the users' manual for details.
2121 
2122    Specify the preallocated storage with either nz or nnz (not both).
2123    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2124    allocation.  For additional details, see the users manual chapter on
2125    matrices.
2126 
2127 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2128 @*/
2129 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[])
2130 {
2131   int ierr,(*f)(Mat,int,int,const int[]);
2132 
2133   PetscFunctionBegin;
2134   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2135   if (f) {
2136     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2137   }
2138   PetscFunctionReturn(0);
2139 }
2140