xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 456192e25527e6dfd94efe19f31bdabe85ed0893)
1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "petscsys.h"                     /*I "petscmat.h" I*/
11 
12 /*
13     Special version for Fun3d sequential benchmark
14 */
15 #if defined(PETSC_HAVE_FORTRAN_CAPS)
16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
18 #define matsetvaluesblocked4_ matsetvaluesblocked4
19 #endif
20 
21 #undef __FUNCT__
22 #define __FUNCT__ "matsetvaluesblocked4_"
23 void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
24 {
25   Mat         A = *AA;
26   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
27   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
28   int         *ai=a->i,*ailen=a->ilen;
29   int         *aj=a->j,stepval;
30   PetscScalar *value = v;
31   MatScalar   *ap,*aa = a->a,*bap;
32 
33   PetscFunctionBegin;
34   stepval = (n-1)*4;
35   for (k=0; k<m; k++) { /* loop over added rows */
36     row  = im[k];
37     rp   = aj + ai[row];
38     ap   = aa + 16*ai[row];
39     nrow = ailen[row];
40     low  = 0;
41     for (l=0; l<n; l++) { /* loop over added columns */
42       col = in[l];
43       value = v + k*(stepval+4)*4 + l*4;
44       low = 0; high = nrow;
45       while (high-low > 7) {
46         t = (low+high)/2;
47         if (rp[t] > col) high = t;
48         else             low  = t;
49       }
50       for (i=low; i<high; i++) {
51         if (rp[i] > col) break;
52         if (rp[i] == col) {
53           bap  = ap +  16*i;
54           for (ii=0; ii<4; ii++,value+=stepval) {
55             for (jj=ii; jj<16; jj+=4) {
56               bap[jj] += *value++;
57             }
58           }
59           goto noinsert2;
60         }
61       }
62       N = nrow++ - 1;
63       /* shift up all the later entries in this row */
64       for (ii=N; ii>=i; ii--) {
65         rp[ii+1] = rp[ii];
66         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
67       }
68       if (N >= i) {
69         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
70       }
71       rp[i] = col;
72       bap   = ap +  16*i;
73       for (ii=0; ii<4; ii++,value+=stepval) {
74         for (jj=ii; jj<16; jj+=4) {
75           bap[jj] = *value++;
76         }
77       }
78       noinsert2:;
79       low = i;
80     }
81     ailen[row] = nrow;
82   }
83 }
84 
85 #if defined(PETSC_HAVE_FORTRAN_CAPS)
86 #define matsetvalues4_ MATSETVALUES4
87 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
88 #define matsetvalues4_ matsetvalues4
89 #endif
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "MatSetValues4_"
93 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
94 {
95   Mat         A = *AA;
96   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
97   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
98   int         *ai=a->i,*ailen=a->ilen;
99   int         *aj=a->j,brow,bcol;
100   int         ridx,cidx;
101   MatScalar   *ap,value,*aa=a->a,*bap;
102 
103   PetscFunctionBegin;
104   for (k=0; k<m; k++) { /* loop over added rows */
105     row  = im[k]; brow = row/4;
106     rp   = aj + ai[brow];
107     ap   = aa + 16*ai[brow];
108     nrow = ailen[brow];
109     low  = 0;
110     for (l=0; l<n; l++) { /* loop over added columns */
111       col = in[l]; bcol = col/4;
112       ridx = row % 4; cidx = col % 4;
113       value = v[l + k*n];
114       low = 0; high = nrow;
115       while (high-low > 7) {
116         t = (low+high)/2;
117         if (rp[t] > bcol) high = t;
118         else              low  = t;
119       }
120       for (i=low; i<high; i++) {
121         if (rp[i] > bcol) break;
122         if (rp[i] == bcol) {
123           bap  = ap +  16*i + 4*cidx + ridx;
124           *bap += value;
125           goto noinsert1;
126         }
127       }
128       N = nrow++ - 1;
129       /* shift up all the later entries in this row */
130       for (ii=N; ii>=i; ii--) {
131         rp[ii+1] = rp[ii];
132         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
133       }
134       if (N>=i) {
135         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
136       }
137       rp[i]                    = bcol;
138       ap[16*i + 4*cidx + ridx] = value;
139       noinsert1:;
140       low = i;
141     }
142     ailen[brow] = nrow;
143   }
144 }
145 
146 /*  UGLY, ugly, ugly
147    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
148    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
149    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
150    converts the entries into single precision and then calls ..._MatScalar() to put them
151    into the single precision data structures.
152 */
153 #if defined(PETSC_USE_MAT_SINGLE)
154 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
155 #else
156 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
157 #endif
158 #if defined(PETSC_HAVE_DSCPACK)
159 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat);
160 #endif
161 
162 #define CHUNKSIZE  10
163 
164 /*
165      Checks for missing diagonals
166 */
167 #undef __FUNCT__
168 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
169 int MatMissingDiagonal_SeqBAIJ(Mat A)
170 {
171   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
172   int         *diag,*jj = a->j,i,ierr;
173 
174   PetscFunctionBegin;
175   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
176   diag = a->diag;
177   for (i=0; i<a->mbs; i++) {
178     if (jj[diag[i]] != i) {
179       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
180     }
181   }
182   PetscFunctionReturn(0);
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
187 int MatMarkDiagonal_SeqBAIJ(Mat A)
188 {
189   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
190   int         i,j,*diag,m = a->mbs,ierr;
191 
192   PetscFunctionBegin;
193   if (a->diag) PetscFunctionReturn(0);
194 
195   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
196   PetscLogObjectMemory(A,(m+1)*sizeof(int));
197   for (i=0; i<m; i++) {
198     diag[i] = a->i[i+1];
199     for (j=a->i[i]; j<a->i[i+1]; j++) {
200       if (a->j[j] == i) {
201         diag[i] = j;
202         break;
203       }
204     }
205   }
206   a->diag = diag;
207   PetscFunctionReturn(0);
208 }
209 
210 
211 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
212 
213 #undef __FUNCT__
214 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
215 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
216 {
217   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
218   int         ierr,n = a->mbs,i;
219 
220   PetscFunctionBegin;
221   *nn = n;
222   if (!ia) PetscFunctionReturn(0);
223   if (symmetric) {
224     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
225   } else if (oshift == 1) {
226     /* temporarily add 1 to i and j indices */
227     int nz = a->i[n];
228     for (i=0; i<nz; i++) a->j[i]++;
229     for (i=0; i<n+1; i++) a->i[i]++;
230     *ia = a->i; *ja = a->j;
231   } else {
232     *ia = a->i; *ja = a->j;
233   }
234 
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
240 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
241 {
242   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
243   int         i,n = a->mbs,ierr;
244 
245   PetscFunctionBegin;
246   if (!ia) PetscFunctionReturn(0);
247   if (symmetric) {
248     ierr = PetscFree(*ia);CHKERRQ(ierr);
249     ierr = PetscFree(*ja);CHKERRQ(ierr);
250   } else if (oshift == 1) {
251     int nz = a->i[n]-1;
252     for (i=0; i<nz; i++) a->j[i]--;
253     for (i=0; i<n+1; i++) a->i[i]--;
254   }
255   PetscFunctionReturn(0);
256 }
257 
258 #undef __FUNCT__
259 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
260 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
261 {
262   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
263 
264   PetscFunctionBegin;
265   *bs = baij->bs;
266   PetscFunctionReturn(0);
267 }
268 
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatDestroy_SeqBAIJ"
272 int MatDestroy_SeqBAIJ(Mat A)
273 {
274   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
275   int         ierr;
276 
277   PetscFunctionBegin;
278 #if defined(PETSC_USE_LOG)
279   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
280 #endif
281   ierr = PetscFree(a->a);CHKERRQ(ierr);
282   if (!a->singlemalloc) {
283     ierr = PetscFree(a->i);CHKERRQ(ierr);
284     ierr = PetscFree(a->j);CHKERRQ(ierr);
285   }
286   if (a->row) {
287     ierr = ISDestroy(a->row);CHKERRQ(ierr);
288   }
289   if (a->col) {
290     ierr = ISDestroy(a->col);CHKERRQ(ierr);
291   }
292   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
293   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
294   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
295   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
296   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
297   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
298   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
299 #if defined(PETSC_USE_MAT_SINGLE)
300   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
301 #endif
302   ierr = PetscFree(a);CHKERRQ(ierr);
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatSetOption_SeqBAIJ"
308 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
309 {
310   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
311 
312   PetscFunctionBegin;
313   switch (op) {
314   case MAT_ROW_ORIENTED:
315     a->roworiented    = PETSC_TRUE;
316     break;
317   case MAT_COLUMN_ORIENTED:
318     a->roworiented    = PETSC_FALSE;
319     break;
320   case MAT_COLUMNS_SORTED:
321     a->sorted         = PETSC_TRUE;
322     break;
323   case MAT_COLUMNS_UNSORTED:
324     a->sorted         = PETSC_FALSE;
325     break;
326   case MAT_KEEP_ZEROED_ROWS:
327     a->keepzeroedrows = PETSC_TRUE;
328     break;
329   case MAT_NO_NEW_NONZERO_LOCATIONS:
330     a->nonew          = 1;
331     break;
332   case MAT_NEW_NONZERO_LOCATION_ERR:
333     a->nonew          = -1;
334     break;
335   case MAT_NEW_NONZERO_ALLOCATION_ERR:
336     a->nonew          = -2;
337     break;
338   case MAT_YES_NEW_NONZERO_LOCATIONS:
339     a->nonew          = 0;
340     break;
341   case MAT_ROWS_SORTED:
342   case MAT_ROWS_UNSORTED:
343   case MAT_YES_NEW_DIAGONALS:
344   case MAT_IGNORE_OFF_PROC_ENTRIES:
345   case MAT_USE_HASH_TABLE:
346     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
347     break;
348   case MAT_NO_NEW_DIAGONALS:
349     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
350   case MAT_USE_SINGLE_PRECISION_SOLVES:
351     if (a->bs==4) {
352       PetscTruth sse_enabled_local,sse_enabled_global;
353       int        ierr;
354 
355       sse_enabled_local  = PETSC_FALSE;
356       sse_enabled_global = PETSC_FALSE;
357 
358       ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr);
359 #if defined(PETSC_HAVE_SSE)
360       if (sse_enabled_local) {
361           a->single_precision_solves = PETSC_TRUE;
362           A->ops->solve              = MatSolve_SeqBAIJ_Update;
363           A->ops->solvetranspose     = MatSolveTranspose_SeqBAIJ_Update;
364           PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n");
365           break;
366       } else {
367         PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
368       }
369 #else
370       PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
371 #endif
372     }
373     break;
374   default:
375     SETERRQ(PETSC_ERR_SUP,"unknown option");
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 #undef __FUNCT__
381 #define __FUNCT__ "MatGetRow_SeqBAIJ"
382 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
383 {
384   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
385   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
386   MatScalar    *aa,*aa_i;
387   PetscScalar  *v_i;
388 
389   PetscFunctionBegin;
390   bs  = a->bs;
391   ai  = a->i;
392   aj  = a->j;
393   aa  = a->a;
394   bs2 = a->bs2;
395 
396   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
397 
398   bn  = row/bs;   /* Block number */
399   bp  = row % bs; /* Block Position */
400   M   = ai[bn+1] - ai[bn];
401   *nz = bs*M;
402 
403   if (v) {
404     *v = 0;
405     if (*nz) {
406       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
407       for (i=0; i<M; i++) { /* for each block in the block row */
408         v_i  = *v + i*bs;
409         aa_i = aa + bs2*(ai[bn] + i);
410         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
411       }
412     }
413   }
414 
415   if (idx) {
416     *idx = 0;
417     if (*nz) {
418       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
419       for (i=0; i<M; i++) { /* for each block in the block row */
420         idx_i = *idx + i*bs;
421         itmp  = bs*aj[ai[bn] + i];
422         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
423       }
424     }
425   }
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
431 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
432 {
433   int ierr;
434 
435   PetscFunctionBegin;
436   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
437   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
438   PetscFunctionReturn(0);
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "MatTranspose_SeqBAIJ"
443 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
444 {
445   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
446   Mat         C;
447   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
448   int         *rows,*cols,bs2=a->bs2;
449   PetscScalar *array;
450 
451   PetscFunctionBegin;
452   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
453   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
454   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
455 
456 #if defined(PETSC_USE_MAT_SINGLE)
457   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
458   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
459 #else
460   array = a->a;
461 #endif
462 
463   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
464   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
465   ierr = PetscFree(col);CHKERRQ(ierr);
466   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
467   cols = rows + bs;
468   for (i=0; i<mbs; i++) {
469     cols[0] = i*bs;
470     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
471     len = ai[i+1] - ai[i];
472     for (j=0; j<len; j++) {
473       rows[0] = (*aj++)*bs;
474       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
475       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
476       array += bs2;
477     }
478   }
479   ierr = PetscFree(rows);CHKERRQ(ierr);
480 #if defined(PETSC_USE_MAT_SINGLE)
481   ierr = PetscFree(array);
482 #endif
483 
484   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
485   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
486 
487   if (B) {
488     *B = C;
489   } else {
490     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
491   }
492   PetscFunctionReturn(0);
493 }
494 
495 #undef __FUNCT__
496 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
497 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
498 {
499   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
500   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
501   PetscScalar *aa;
502   FILE        *file;
503 
504   PetscFunctionBegin;
505   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
506   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
507   col_lens[0] = MAT_FILE_COOKIE;
508 
509   col_lens[1] = A->m;
510   col_lens[2] = A->n;
511   col_lens[3] = a->nz*bs2;
512 
513   /* store lengths of each row and write (including header) to file */
514   count = 0;
515   for (i=0; i<a->mbs; i++) {
516     for (j=0; j<bs; j++) {
517       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
518     }
519   }
520   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
521   ierr = PetscFree(col_lens);CHKERRQ(ierr);
522 
523   /* store column indices (zero start index) */
524   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
525   count = 0;
526   for (i=0; i<a->mbs; i++) {
527     for (j=0; j<bs; j++) {
528       for (k=a->i[i]; k<a->i[i+1]; k++) {
529         for (l=0; l<bs; l++) {
530           jj[count++] = bs*a->j[k] + l;
531         }
532       }
533     }
534   }
535   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
536   ierr = PetscFree(jj);CHKERRQ(ierr);
537 
538   /* store nonzero values */
539   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
540   count = 0;
541   for (i=0; i<a->mbs; i++) {
542     for (j=0; j<bs; j++) {
543       for (k=a->i[i]; k<a->i[i+1]; k++) {
544         for (l=0; l<bs; l++) {
545           aa[count++] = a->a[bs2*k + l*bs + j];
546         }
547       }
548     }
549   }
550   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
551   ierr = PetscFree(aa);CHKERRQ(ierr);
552 
553   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
554   if (file) {
555     fprintf(file,"-matload_block_size %d\n",a->bs);
556   }
557   PetscFunctionReturn(0);
558 }
559 
560 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer);
561 
562 #undef __FUNCT__
563 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
564 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
565 {
566   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
567   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
568   PetscViewerFormat format;
569 
570   PetscFunctionBegin;
571   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
572   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
573     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
574   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
575     Mat aij;
576     ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr);
577     ierr = MatView(aij,viewer);CHKERRQ(ierr);
578     ierr = MatDestroy(aij);CHKERRQ(ierr);
579   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
580 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
581      ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr);
582 #endif
583      PetscFunctionReturn(0);
584   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
585     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
586     for (i=0; i<a->mbs; i++) {
587       for (j=0; j<bs; j++) {
588         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
589         for (k=a->i[i]; k<a->i[i+1]; k++) {
590           for (l=0; l<bs; l++) {
591 #if defined(PETSC_USE_COMPLEX)
592             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
593               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
594                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
595             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
596               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
597                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
598             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
599               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
600             }
601 #else
602             if (a->a[bs2*k + l*bs + j] != 0.0) {
603               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
604             }
605 #endif
606           }
607         }
608         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
609       }
610     }
611     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
612   } else {
613     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
614     for (i=0; i<a->mbs; i++) {
615       for (j=0; j<bs; j++) {
616         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
617         for (k=a->i[i]; k<a->i[i+1]; k++) {
618           for (l=0; l<bs; l++) {
619 #if defined(PETSC_USE_COMPLEX)
620             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
621               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
622                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
623             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
624               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
625                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
626             } else {
627               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
628             }
629 #else
630             ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
631 #endif
632           }
633         }
634         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
635       }
636     }
637     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
638   }
639   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
640   PetscFunctionReturn(0);
641 }
642 
643 #undef __FUNCT__
644 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
645 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
646 {
647   Mat          A = (Mat) Aa;
648   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
649   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
650   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
651   MatScalar    *aa;
652   PetscViewer  viewer;
653 
654   PetscFunctionBegin;
655 
656   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
657   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
658 
659   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
660 
661   /* loop over matrix elements drawing boxes */
662   color = PETSC_DRAW_BLUE;
663   for (i=0,row=0; i<mbs; i++,row+=bs) {
664     for (j=a->i[i]; j<a->i[i+1]; j++) {
665       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
666       x_l = a->j[j]*bs; x_r = x_l + 1.0;
667       aa = a->a + j*bs2;
668       for (k=0; k<bs; k++) {
669         for (l=0; l<bs; l++) {
670           if (PetscRealPart(*aa++) >=  0.) continue;
671           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
672         }
673       }
674     }
675   }
676   color = PETSC_DRAW_CYAN;
677   for (i=0,row=0; i<mbs; i++,row+=bs) {
678     for (j=a->i[i]; j<a->i[i+1]; j++) {
679       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
680       x_l = a->j[j]*bs; x_r = x_l + 1.0;
681       aa = a->a + j*bs2;
682       for (k=0; k<bs; k++) {
683         for (l=0; l<bs; l++) {
684           if (PetscRealPart(*aa++) != 0.) continue;
685           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
686         }
687       }
688     }
689   }
690 
691   color = PETSC_DRAW_RED;
692   for (i=0,row=0; i<mbs; i++,row+=bs) {
693     for (j=a->i[i]; j<a->i[i+1]; j++) {
694       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
695       x_l = a->j[j]*bs; x_r = x_l + 1.0;
696       aa = a->a + j*bs2;
697       for (k=0; k<bs; k++) {
698         for (l=0; l<bs; l++) {
699           if (PetscRealPart(*aa++) <= 0.) continue;
700           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
701         }
702       }
703     }
704   }
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
710 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
711 {
712   int          ierr;
713   PetscReal    xl,yl,xr,yr,w,h;
714   PetscDraw    draw;
715   PetscTruth   isnull;
716 
717   PetscFunctionBegin;
718 
719   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
720   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
721 
722   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
723   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
724   xr += w;    yr += h;  xl = -w;     yl = -h;
725   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
726   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
727   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatView_SeqBAIJ"
733 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
734 {
735   int        ierr;
736   PetscTruth issocket,isascii,isbinary,isdraw;
737 
738   PetscFunctionBegin;
739   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
740   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
741   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
742   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
743   if (issocket) {
744     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
745   } else if (isascii){
746     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
747   } else if (isbinary) {
748     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
749   } else if (isdraw) {
750     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
751   } else {
752     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
753   }
754   PetscFunctionReturn(0);
755 }
756 
757 
758 #undef __FUNCT__
759 #define __FUNCT__ "MatGetValues_SeqBAIJ"
760 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
761 {
762   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
763   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
764   int        *ai = a->i,*ailen = a->ilen;
765   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
766   MatScalar  *ap,*aa = a->a,zero = 0.0;
767 
768   PetscFunctionBegin;
769   for (k=0; k<m; k++) { /* loop over rows */
770     row  = im[k]; brow = row/bs;
771     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
772     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
773     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
774     nrow = ailen[brow];
775     for (l=0; l<n; l++) { /* loop over columns */
776       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
777       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
778       col  = in[l] ;
779       bcol = col/bs;
780       cidx = col%bs;
781       ridx = row%bs;
782       high = nrow;
783       low  = 0; /* assume unsorted */
784       while (high-low > 5) {
785         t = (low+high)/2;
786         if (rp[t] > bcol) high = t;
787         else             low  = t;
788       }
789       for (i=low; i<high; i++) {
790         if (rp[i] > bcol) break;
791         if (rp[i] == bcol) {
792           *v++ = ap[bs2*i+bs*cidx+ridx];
793           goto finished;
794         }
795       }
796       *v++ = zero;
797       finished:;
798     }
799   }
800   PetscFunctionReturn(0);
801 }
802 
803 #if defined(PETSC_USE_MAT_SINGLE)
804 #undef __FUNCT__
805 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
806 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
807 {
808   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
809   int         ierr,i,N = m*n*b->bs2;
810   MatScalar   *vsingle;
811 
812   PetscFunctionBegin;
813   if (N > b->setvalueslen) {
814     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
815     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
816     b->setvalueslen  = N;
817   }
818   vsingle = b->setvaluescopy;
819   for (i=0; i<N; i++) {
820     vsingle[i] = v[i];
821   }
822   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
823   PetscFunctionReturn(0);
824 }
825 #endif
826 
827 
828 #undef __FUNCT__
829 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
830 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
831 {
832   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
833   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
834   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
835   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
836   PetscTruth  roworiented=a->roworiented;
837   MatScalar   *value = v,*ap,*aa = a->a,*bap;
838 
839   PetscFunctionBegin;
840   if (roworiented) {
841     stepval = (n-1)*bs;
842   } else {
843     stepval = (m-1)*bs;
844   }
845   for (k=0; k<m; k++) { /* loop over added rows */
846     row  = im[k];
847     if (row < 0) continue;
848 #if defined(PETSC_USE_BOPT_g)
849     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
850 #endif
851     rp   = aj + ai[row];
852     ap   = aa + bs2*ai[row];
853     rmax = imax[row];
854     nrow = ailen[row];
855     low  = 0;
856     for (l=0; l<n; l++) { /* loop over added columns */
857       if (in[l] < 0) continue;
858 #if defined(PETSC_USE_BOPT_g)
859       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
860 #endif
861       col = in[l];
862       if (roworiented) {
863         value = v + k*(stepval+bs)*bs + l*bs;
864       } else {
865         value = v + l*(stepval+bs)*bs + k*bs;
866       }
867       if (!sorted) low = 0; high = nrow;
868       while (high-low > 7) {
869         t = (low+high)/2;
870         if (rp[t] > col) high = t;
871         else             low  = t;
872       }
873       for (i=low; i<high; i++) {
874         if (rp[i] > col) break;
875         if (rp[i] == col) {
876           bap  = ap +  bs2*i;
877           if (roworiented) {
878             if (is == ADD_VALUES) {
879               for (ii=0; ii<bs; ii++,value+=stepval) {
880                 for (jj=ii; jj<bs2; jj+=bs) {
881                   bap[jj] += *value++;
882                 }
883               }
884             } else {
885               for (ii=0; ii<bs; ii++,value+=stepval) {
886                 for (jj=ii; jj<bs2; jj+=bs) {
887                   bap[jj] = *value++;
888                 }
889               }
890             }
891           } else {
892             if (is == ADD_VALUES) {
893               for (ii=0; ii<bs; ii++,value+=stepval) {
894                 for (jj=0; jj<bs; jj++) {
895                   *bap++ += *value++;
896                 }
897               }
898             } else {
899               for (ii=0; ii<bs; ii++,value+=stepval) {
900                 for (jj=0; jj<bs; jj++) {
901                   *bap++  = *value++;
902                 }
903               }
904             }
905           }
906           goto noinsert2;
907         }
908       }
909       if (nonew == 1) goto noinsert2;
910       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
911       if (nrow >= rmax) {
912         /* there is no extra room in row, therefore enlarge */
913         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
914         MatScalar *new_a;
915 
916         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
917 
918         /* malloc new storage space */
919         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
920 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
921         new_j   = (int*)(new_a + bs2*new_nz);
922         new_i   = new_j + new_nz;
923 
924         /* copy over old data into new slots */
925         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
926         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
927         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
928         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
929         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
930         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
931         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
932         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
933         /* free up old matrix storage */
934         ierr = PetscFree(a->a);CHKERRQ(ierr);
935         if (!a->singlemalloc) {
936           ierr = PetscFree(a->i);CHKERRQ(ierr);
937           ierr = PetscFree(a->j);CHKERRQ(ierr);
938         }
939         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
940         a->singlemalloc = PETSC_TRUE;
941 
942         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
943         rmax = imax[row] = imax[row] + CHUNKSIZE;
944         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
945         a->maxnz += bs2*CHUNKSIZE;
946         a->reallocs++;
947         a->nz++;
948       }
949       N = nrow++ - 1;
950       /* shift up all the later entries in this row */
951       for (ii=N; ii>=i; ii--) {
952         rp[ii+1] = rp[ii];
953         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
954       }
955       if (N >= i) {
956         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
957       }
958       rp[i] = col;
959       bap   = ap +  bs2*i;
960       if (roworiented) {
961         for (ii=0; ii<bs; ii++,value+=stepval) {
962           for (jj=ii; jj<bs2; jj+=bs) {
963             bap[jj] = *value++;
964           }
965         }
966       } else {
967         for (ii=0; ii<bs; ii++,value+=stepval) {
968           for (jj=0; jj<bs; jj++) {
969             *bap++  = *value++;
970           }
971         }
972       }
973       noinsert2:;
974       low = i;
975     }
976     ailen[row] = nrow;
977   }
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
983 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
984 {
985   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
986   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
987   int        m = A->m,*ip,N,*ailen = a->ilen;
988   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
989   MatScalar  *aa = a->a,*ap;
990 #if defined(PETSC_HAVE_DSCPACK)
991   PetscTruth   flag;
992 #endif
993 
994   PetscFunctionBegin;
995   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
996 
997   if (m) rmax = ailen[0];
998   for (i=1; i<mbs; i++) {
999     /* move each row back by the amount of empty slots (fshift) before it*/
1000     fshift += imax[i-1] - ailen[i-1];
1001     rmax   = PetscMax(rmax,ailen[i]);
1002     if (fshift) {
1003       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1004       N = ailen[i];
1005       for (j=0; j<N; j++) {
1006         ip[j-fshift] = ip[j];
1007         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1008       }
1009     }
1010     ai[i] = ai[i-1] + ailen[i-1];
1011   }
1012   if (mbs) {
1013     fshift += imax[mbs-1] - ailen[mbs-1];
1014     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1015   }
1016   /* reset ilen and imax for each row */
1017   for (i=0; i<mbs; i++) {
1018     ailen[i] = imax[i] = ai[i+1] - ai[i];
1019   }
1020   a->nz = ai[mbs];
1021 
1022   /* diagonals may have moved, so kill the diagonal pointers */
1023   if (fshift && a->diag) {
1024     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1025     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
1026     a->diag = 0;
1027   }
1028   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1029   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
1030   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
1031   a->reallocs          = 0;
1032   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1033 
1034 #if defined(PETSC_HAVE_DSCPACK)
1035   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);CHKERRQ(ierr);
1036   if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); }
1037 #endif
1038 
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 
1043 
1044 /*
1045    This function returns an array of flags which indicate the locations of contiguous
1046    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1047    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1048    Assume: sizes should be long enough to hold all the values.
1049 */
1050 #undef __FUNCT__
1051 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1052 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1053 {
1054   int        i,j,k,row;
1055   PetscTruth flg;
1056 
1057   PetscFunctionBegin;
1058   for (i=0,j=0; i<n; j++) {
1059     row = idx[i];
1060     if (row%bs!=0) { /* Not the begining of a block */
1061       sizes[j] = 1;
1062       i++;
1063     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1064       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1065       i++;
1066     } else { /* Begining of the block, so check if the complete block exists */
1067       flg = PETSC_TRUE;
1068       for (k=1; k<bs; k++) {
1069         if (row+k != idx[i+k]) { /* break in the block */
1070           flg = PETSC_FALSE;
1071           break;
1072         }
1073       }
1074       if (flg == PETSC_TRUE) { /* No break in the bs */
1075         sizes[j] = bs;
1076         i+= bs;
1077       } else {
1078         sizes[j] = 1;
1079         i++;
1080       }
1081     }
1082   }
1083   *bs_max = j;
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1089 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag)
1090 {
1091   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1092   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
1093   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1094   PetscScalar zero = 0.0;
1095   MatScalar   *aa;
1096 
1097   PetscFunctionBegin;
1098   /* Make a copy of the IS and  sort it */
1099   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1100   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1101 
1102   /* allocate memory for rows,sizes */
1103   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
1104   sizes = rows + is_n;
1105 
1106   /* copy IS values to rows, and sort them */
1107   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1108   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1109   if (baij->keepzeroedrows) {
1110     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1111     bs_max = is_n;
1112   } else {
1113     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1114   }
1115   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1116 
1117   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1118     row   = rows[j];
1119     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1120     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1121     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1122     if (sizes[i] == bs && !baij->keepzeroedrows) {
1123       if (diag) {
1124         if (baij->ilen[row/bs] > 0) {
1125           baij->ilen[row/bs]       = 1;
1126           baij->j[baij->i[row/bs]] = row/bs;
1127           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1128         }
1129         /* Now insert all the diagonal values for this bs */
1130         for (k=0; k<bs; k++) {
1131           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1132         }
1133       } else { /* (!diag) */
1134         baij->ilen[row/bs] = 0;
1135       } /* end (!diag) */
1136     } else { /* (sizes[i] != bs) */
1137 #if defined (PETSC_USE_DEBUG)
1138       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1139 #endif
1140       for (k=0; k<count; k++) {
1141         aa[0] =  zero;
1142         aa    += bs;
1143       }
1144       if (diag) {
1145         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1146       }
1147     }
1148   }
1149 
1150   ierr = PetscFree(rows);CHKERRQ(ierr);
1151   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1157 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
1158 {
1159   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1160   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1161   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1162   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1163   int         ridx,cidx,bs2=a->bs2,ierr;
1164   PetscTruth  roworiented=a->roworiented;
1165   MatScalar   *ap,value,*aa=a->a,*bap;
1166 
1167   PetscFunctionBegin;
1168   for (k=0; k<m; k++) { /* loop over added rows */
1169     row  = im[k]; brow = row/bs;
1170     if (row < 0) continue;
1171 #if defined(PETSC_USE_BOPT_g)
1172     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1173 #endif
1174     rp   = aj + ai[brow];
1175     ap   = aa + bs2*ai[brow];
1176     rmax = imax[brow];
1177     nrow = ailen[brow];
1178     low  = 0;
1179     for (l=0; l<n; l++) { /* loop over added columns */
1180       if (in[l] < 0) continue;
1181 #if defined(PETSC_USE_BOPT_g)
1182       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1183 #endif
1184       col = in[l]; bcol = col/bs;
1185       ridx = row % bs; cidx = col % bs;
1186       if (roworiented) {
1187         value = v[l + k*n];
1188       } else {
1189         value = v[k + l*m];
1190       }
1191       if (!sorted) low = 0; high = nrow;
1192       while (high-low > 7) {
1193         t = (low+high)/2;
1194         if (rp[t] > bcol) high = t;
1195         else              low  = t;
1196       }
1197       for (i=low; i<high; i++) {
1198         if (rp[i] > bcol) break;
1199         if (rp[i] == bcol) {
1200           bap  = ap +  bs2*i + bs*cidx + ridx;
1201           if (is == ADD_VALUES) *bap += value;
1202           else                  *bap  = value;
1203           goto noinsert1;
1204         }
1205       }
1206       if (nonew == 1) goto noinsert1;
1207       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1208       if (nrow >= rmax) {
1209         /* there is no extra room in row, therefore enlarge */
1210         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1211         MatScalar *new_a;
1212 
1213         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1214 
1215         /* Malloc new storage space */
1216         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1217 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1218         new_j   = (int*)(new_a + bs2*new_nz);
1219         new_i   = new_j + new_nz;
1220 
1221         /* copy over old data into new slots */
1222         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1223         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1224         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1225         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1226         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1227         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1228         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1229         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1230         /* free up old matrix storage */
1231         ierr = PetscFree(a->a);CHKERRQ(ierr);
1232         if (!a->singlemalloc) {
1233           ierr = PetscFree(a->i);CHKERRQ(ierr);
1234           ierr = PetscFree(a->j);CHKERRQ(ierr);
1235         }
1236         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1237         a->singlemalloc = PETSC_TRUE;
1238 
1239         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1240         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1241         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1242         a->maxnz += bs2*CHUNKSIZE;
1243         a->reallocs++;
1244         a->nz++;
1245       }
1246       N = nrow++ - 1;
1247       /* shift up all the later entries in this row */
1248       for (ii=N; ii>=i; ii--) {
1249         rp[ii+1] = rp[ii];
1250         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1251       }
1252       if (N>=i) {
1253         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1254       }
1255       rp[i]                      = bcol;
1256       ap[bs2*i + bs*cidx + ridx] = value;
1257       noinsert1:;
1258       low = i;
1259     }
1260     ailen[brow] = nrow;
1261   }
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1268 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1269 {
1270   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1271   Mat         outA;
1272   int         ierr;
1273   PetscTruth  row_identity,col_identity;
1274 
1275   PetscFunctionBegin;
1276   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1277   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1278   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1279   if (!row_identity || !col_identity) {
1280     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1281   }
1282 
1283   outA          = inA;
1284   inA->factor   = FACTOR_LU;
1285 
1286   if (!a->diag) {
1287     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1288   }
1289 
1290   a->row        = row;
1291   a->col        = col;
1292   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1293   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1294 
1295   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1296   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1297   PetscLogObjectParent(inA,a->icol);
1298 
1299   /*
1300       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1301       for ILU(0) factorization with natural ordering
1302   */
1303   if (a->bs < 8) {
1304     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1305   } else {
1306     if (!a->solve_work) {
1307       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1308       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1309     }
1310   }
1311 
1312   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1313 
1314   PetscFunctionReturn(0);
1315 }
1316 #undef __FUNCT__
1317 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1318 int MatPrintHelp_SeqBAIJ(Mat A)
1319 {
1320   static PetscTruth called = PETSC_FALSE;
1321   MPI_Comm          comm = A->comm;
1322   int               ierr;
1323 
1324   PetscFunctionBegin;
1325   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1326   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1327   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 EXTERN_C_BEGIN
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1334 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1335 {
1336   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1337   int         i,nz,nbs;
1338 
1339   PetscFunctionBegin;
1340   nz  = baij->maxnz/baij->bs2;
1341   nbs = baij->nbs;
1342   for (i=0; i<nz; i++) {
1343     baij->j[i] = indices[i];
1344   }
1345   baij->nz = nz;
1346   for (i=0; i<nbs; i++) {
1347     baij->ilen[i] = baij->imax[i];
1348   }
1349 
1350   PetscFunctionReturn(0);
1351 }
1352 EXTERN_C_END
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1356 /*@
1357     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1358        in the matrix.
1359 
1360   Input Parameters:
1361 +  mat - the SeqBAIJ matrix
1362 -  indices - the column indices
1363 
1364   Level: advanced
1365 
1366   Notes:
1367     This can be called if you have precomputed the nonzero structure of the
1368   matrix and want to provide it to the matrix object to improve the performance
1369   of the MatSetValues() operation.
1370 
1371     You MUST have set the correct numbers of nonzeros per row in the call to
1372   MatCreateSeqBAIJ().
1373 
1374     MUST be called before any calls to MatSetValues();
1375 
1376 @*/
1377 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1378 {
1379   int ierr,(*f)(Mat,int *);
1380 
1381   PetscFunctionBegin;
1382   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1383   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1384   if (f) {
1385     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1386   } else {
1387     SETERRQ(1,"Wrong type of matrix to set column indices");
1388   }
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 #undef __FUNCT__
1393 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1394 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1395 {
1396   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1397   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1398   PetscReal    atmp;
1399   PetscScalar  *x,zero = 0.0;
1400   MatScalar    *aa;
1401   int          ncols,brow,krow,kcol;
1402 
1403   PetscFunctionBegin;
1404   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1405   bs   = a->bs;
1406   aa   = a->a;
1407   ai   = a->i;
1408   aj   = a->j;
1409   mbs = a->mbs;
1410 
1411   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1412   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1413   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1414   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1415   for (i=0; i<mbs; i++) {
1416     ncols = ai[1] - ai[0]; ai++;
1417     brow  = bs*i;
1418     for (j=0; j<ncols; j++){
1419       /* bcol = bs*(*aj); */
1420       for (kcol=0; kcol<bs; kcol++){
1421         for (krow=0; krow<bs; krow++){
1422           atmp = PetscAbsScalar(*aa); aa++;
1423           row = brow + krow;    /* row index */
1424           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1425           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1426         }
1427       }
1428       aj++;
1429     }
1430   }
1431   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 #undef __FUNCT__
1436 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1437 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1438 {
1439   int        ierr;
1440 
1441   PetscFunctionBegin;
1442   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1448 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array)
1449 {
1450   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1451   PetscFunctionBegin;
1452   *array = a->a;
1453   PetscFunctionReturn(0);
1454 }
1455 
1456 #undef __FUNCT__
1457 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1458 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array)
1459 {
1460   PetscFunctionBegin;
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 #include "petscblaslapack.h"
1465 #undef __FUNCT__
1466 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1467 int MatAXPY_SeqBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1468 {
1469   int          ierr,one=1;
1470   Mat_SeqBAIJ  *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1471 
1472   PetscFunctionBegin;
1473   if (str == SAME_NONZERO_PATTERN) {
1474     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1475   } else {
1476     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1477   }
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 /* -------------------------------------------------------------------*/
1482 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1483        MatGetRow_SeqBAIJ,
1484        MatRestoreRow_SeqBAIJ,
1485        MatMult_SeqBAIJ_N,
1486        MatMultAdd_SeqBAIJ_N,
1487        MatMultTranspose_SeqBAIJ,
1488        MatMultTransposeAdd_SeqBAIJ,
1489        MatSolve_SeqBAIJ_N,
1490        0,
1491        0,
1492        0,
1493        MatLUFactor_SeqBAIJ,
1494        0,
1495        0,
1496        MatTranspose_SeqBAIJ,
1497        MatGetInfo_SeqBAIJ,
1498        MatEqual_SeqBAIJ,
1499        MatGetDiagonal_SeqBAIJ,
1500        MatDiagonalScale_SeqBAIJ,
1501        MatNorm_SeqBAIJ,
1502        0,
1503        MatAssemblyEnd_SeqBAIJ,
1504        0,
1505        MatSetOption_SeqBAIJ,
1506        MatZeroEntries_SeqBAIJ,
1507        MatZeroRows_SeqBAIJ,
1508        MatLUFactorSymbolic_SeqBAIJ,
1509        MatLUFactorNumeric_SeqBAIJ_N,
1510        0,
1511        0,
1512        MatSetUpPreallocation_SeqBAIJ,
1513        MatILUFactorSymbolic_SeqBAIJ,
1514        0,
1515        MatGetArray_SeqBAIJ,
1516        MatRestoreArray_SeqBAIJ,
1517        MatDuplicate_SeqBAIJ,
1518        0,
1519        0,
1520        MatILUFactor_SeqBAIJ,
1521        0,
1522        MatAXPY_SeqBAIJ,
1523        MatGetSubMatrices_SeqBAIJ,
1524        MatIncreaseOverlap_SeqBAIJ,
1525        MatGetValues_SeqBAIJ,
1526        0,
1527        MatPrintHelp_SeqBAIJ,
1528        MatScale_SeqBAIJ,
1529        0,
1530        0,
1531        0,
1532        MatGetBlockSize_SeqBAIJ,
1533        MatGetRowIJ_SeqBAIJ,
1534        MatRestoreRowIJ_SeqBAIJ,
1535        0,
1536        0,
1537        0,
1538        0,
1539        0,
1540        0,
1541        MatSetValuesBlocked_SeqBAIJ,
1542        MatGetSubMatrix_SeqBAIJ,
1543        MatDestroy_SeqBAIJ,
1544        MatView_SeqBAIJ,
1545        MatGetPetscMaps_Petsc,
1546        0,
1547        0,
1548        0,
1549        0,
1550        0,
1551        0,
1552        MatGetRowMax_SeqBAIJ,
1553        MatConvert_Basic};
1554 
1555 EXTERN_C_BEGIN
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1558 int MatStoreValues_SeqBAIJ(Mat mat)
1559 {
1560   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1561   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1562   int         ierr;
1563 
1564   PetscFunctionBegin;
1565   if (aij->nonew != 1) {
1566     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1567   }
1568 
1569   /* allocate space for values if not already there */
1570   if (!aij->saved_values) {
1571     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1572   }
1573 
1574   /* copy values over */
1575   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1576   PetscFunctionReturn(0);
1577 }
1578 EXTERN_C_END
1579 
1580 EXTERN_C_BEGIN
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1583 int MatRetrieveValues_SeqBAIJ(Mat mat)
1584 {
1585   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1586   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1587 
1588   PetscFunctionBegin;
1589   if (aij->nonew != 1) {
1590     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1591   }
1592   if (!aij->saved_values) {
1593     SETERRQ(1,"Must call MatStoreValues(A);first");
1594   }
1595 
1596   /* copy values over */
1597   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1598   PetscFunctionReturn(0);
1599 }
1600 EXTERN_C_END
1601 
1602 EXTERN_C_BEGIN
1603 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1604 EXTERN_C_END
1605 
1606 EXTERN_C_BEGIN
1607 #undef __FUNCT__
1608 #define __FUNCT__ "MatCreate_SeqBAIJ"
1609 int MatCreate_SeqBAIJ(Mat B)
1610 {
1611   int         ierr,size;
1612   Mat_SeqBAIJ *b;
1613 
1614   PetscFunctionBegin;
1615   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1616   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1617 
1618   B->m = B->M = PetscMax(B->m,B->M);
1619   B->n = B->N = PetscMax(B->n,B->N);
1620   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1621   B->data = (void*)b;
1622   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1623   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1624   B->factor           = 0;
1625   B->lupivotthreshold = 1.0;
1626   B->mapping          = 0;
1627   b->row              = 0;
1628   b->col              = 0;
1629   b->icol             = 0;
1630   b->reallocs         = 0;
1631   b->saved_values     = 0;
1632 #if defined(PETSC_USE_MAT_SINGLE)
1633   b->setvalueslen     = 0;
1634   b->setvaluescopy    = PETSC_NULL;
1635 #endif
1636   b->single_precision_solves = PETSC_FALSE;
1637 
1638   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1639   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1640 
1641   b->sorted           = PETSC_FALSE;
1642   b->roworiented      = PETSC_TRUE;
1643   b->nonew            = 0;
1644   b->diag             = 0;
1645   b->solve_work       = 0;
1646   b->mult_work        = 0;
1647   B->spptr            = 0;
1648   B->info.nz_unneeded = (PetscReal)b->maxnz;
1649   b->keepzeroedrows   = PETSC_FALSE;
1650 
1651   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1652                                      "MatStoreValues_SeqBAIJ",
1653                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1654   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1655                                      "MatRetrieveValues_SeqBAIJ",
1656                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1657   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1658                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1659                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1660   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1661                                      "MatConvert_SeqBAIJ_SeqAIJ",
1662                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1663   PetscFunctionReturn(0);
1664 }
1665 EXTERN_C_END
1666 
1667 #undef __FUNCT__
1668 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1669 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1670 {
1671   Mat         C;
1672   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1673   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1674 
1675   PetscFunctionBegin;
1676   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1677 
1678   *B = 0;
1679   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1680   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1681   c    = (Mat_SeqBAIJ*)C->data;
1682 
1683   c->bs         = a->bs;
1684   c->bs2        = a->bs2;
1685   c->mbs        = a->mbs;
1686   c->nbs        = a->nbs;
1687   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1688 
1689   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1690   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1691   for (i=0; i<mbs; i++) {
1692     c->imax[i] = a->imax[i];
1693     c->ilen[i] = a->ilen[i];
1694   }
1695 
1696   /* allocate the matrix space */
1697   c->singlemalloc = PETSC_TRUE;
1698   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1699   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1700   c->j = (int*)(c->a + nz*bs2);
1701   c->i = c->j + nz;
1702   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1703   if (mbs > 0) {
1704     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1705     if (cpvalues == MAT_COPY_VALUES) {
1706       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1707     } else {
1708       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1709     }
1710   }
1711 
1712   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1713   c->sorted      = a->sorted;
1714   c->roworiented = a->roworiented;
1715   c->nonew       = a->nonew;
1716 
1717   if (a->diag) {
1718     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1719     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1720     for (i=0; i<mbs; i++) {
1721       c->diag[i] = a->diag[i];
1722     }
1723   } else c->diag        = 0;
1724   c->nz                 = a->nz;
1725   c->maxnz              = a->maxnz;
1726   c->solve_work         = 0;
1727   C->spptr              = 0;     /* Dangerous -I'm throwing away a->spptr */
1728   c->mult_work          = 0;
1729   C->preallocated       = PETSC_TRUE;
1730   C->assembled          = PETSC_TRUE;
1731   *B = C;
1732   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 EXTERN_C_BEGIN
1737 #undef __FUNCT__
1738 #define __FUNCT__ "MatLoad_SeqBAIJ"
1739 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1740 {
1741   Mat_SeqBAIJ  *a;
1742   Mat          B;
1743   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1744   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1745   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1746   int          *masked,nmask,tmp,bs2,ishift;
1747   PetscScalar  *aa;
1748   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1749 
1750   PetscFunctionBegin;
1751   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1752   bs2  = bs*bs;
1753 
1754   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1755   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1756   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1757   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1758   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1759   M = header[1]; N = header[2]; nz = header[3];
1760 
1761   if (header[3] < 0) {
1762     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1763   }
1764 
1765   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1766 
1767   /*
1768      This code adds extra rows to make sure the number of rows is
1769     divisible by the blocksize
1770   */
1771   mbs        = M/bs;
1772   extra_rows = bs - M + bs*(mbs);
1773   if (extra_rows == bs) extra_rows = 0;
1774   else                  mbs++;
1775   if (extra_rows) {
1776     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1777   }
1778 
1779   /* read in row lengths */
1780   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1781   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1782   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1783 
1784   /* read in column indices */
1785   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1786   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1787   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1788 
1789   /* loop over row lengths determining block row lengths */
1790   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1791   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1792   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1793   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1794   masked   = mask + mbs;
1795   rowcount = 0; nzcount = 0;
1796   for (i=0; i<mbs; i++) {
1797     nmask = 0;
1798     for (j=0; j<bs; j++) {
1799       kmax = rowlengths[rowcount];
1800       for (k=0; k<kmax; k++) {
1801         tmp = jj[nzcount++]/bs;
1802         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1803       }
1804       rowcount++;
1805     }
1806     browlengths[i] += nmask;
1807     /* zero out the mask elements we set */
1808     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1809   }
1810 
1811   /* create our matrix */
1812   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1813   B = *A;
1814   a = (Mat_SeqBAIJ*)B->data;
1815 
1816   /* set matrix "i" values */
1817   a->i[0] = 0;
1818   for (i=1; i<= mbs; i++) {
1819     a->i[i]      = a->i[i-1] + browlengths[i-1];
1820     a->ilen[i-1] = browlengths[i-1];
1821   }
1822   a->nz         = 0;
1823   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1824 
1825   /* read in nonzero values */
1826   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1827   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1828   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1829 
1830   /* set "a" and "j" values into matrix */
1831   nzcount = 0; jcount = 0;
1832   for (i=0; i<mbs; i++) {
1833     nzcountb = nzcount;
1834     nmask    = 0;
1835     for (j=0; j<bs; j++) {
1836       kmax = rowlengths[i*bs+j];
1837       for (k=0; k<kmax; k++) {
1838         tmp = jj[nzcount++]/bs;
1839 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1840       }
1841     }
1842     /* sort the masked values */
1843     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1844 
1845     /* set "j" values into matrix */
1846     maskcount = 1;
1847     for (j=0; j<nmask; j++) {
1848       a->j[jcount++]  = masked[j];
1849       mask[masked[j]] = maskcount++;
1850     }
1851     /* set "a" values into matrix */
1852     ishift = bs2*a->i[i];
1853     for (j=0; j<bs; j++) {
1854       kmax = rowlengths[i*bs+j];
1855       for (k=0; k<kmax; k++) {
1856         tmp       = jj[nzcountb]/bs ;
1857         block     = mask[tmp] - 1;
1858         point     = jj[nzcountb] - bs*tmp;
1859         idx       = ishift + bs2*block + j + bs*point;
1860         a->a[idx] = (MatScalar)aa[nzcountb++];
1861       }
1862     }
1863     /* zero out the mask elements we set */
1864     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1865   }
1866   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1867 
1868   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1869   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1870   ierr = PetscFree(aa);CHKERRQ(ierr);
1871   ierr = PetscFree(jj);CHKERRQ(ierr);
1872   ierr = PetscFree(mask);CHKERRQ(ierr);
1873 
1874   B->assembled = PETSC_TRUE;
1875 
1876   ierr = MatView_Private(B);CHKERRQ(ierr);
1877   PetscFunctionReturn(0);
1878 }
1879 EXTERN_C_END
1880 
1881 #undef __FUNCT__
1882 #define __FUNCT__ "MatCreateSeqBAIJ"
1883 /*@C
1884    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1885    compressed row) format.  For good matrix assembly performance the
1886    user should preallocate the matrix storage by setting the parameter nz
1887    (or the array nnz).  By setting these parameters accurately, performance
1888    during matrix assembly can be increased by more than a factor of 50.
1889 
1890    Collective on MPI_Comm
1891 
1892    Input Parameters:
1893 +  comm - MPI communicator, set to PETSC_COMM_SELF
1894 .  bs - size of block
1895 .  m - number of rows
1896 .  n - number of columns
1897 .  nz - number of nonzero blocks  per block row (same for all rows)
1898 -  nnz - array containing the number of nonzero blocks in the various block rows
1899          (possibly different for each block row) or PETSC_NULL
1900 
1901    Output Parameter:
1902 .  A - the matrix
1903 
1904    Options Database Keys:
1905 .   -mat_no_unroll - uses code that does not unroll the loops in the
1906                      block calculations (much slower)
1907 .    -mat_block_size - size of the blocks to use
1908 
1909    Level: intermediate
1910 
1911    Notes:
1912    A nonzero block is any block that as 1 or more nonzeros in it
1913 
1914    The block AIJ format is fully compatible with standard Fortran 77
1915    storage.  That is, the stored row and column indices can begin at
1916    either one (as in Fortran) or zero.  See the users' manual for details.
1917 
1918    Specify the preallocated storage with either nz or nnz (not both).
1919    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1920    allocation.  For additional details, see the users manual chapter on
1921    matrices.
1922 
1923 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1924 @*/
1925 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1926 {
1927   int ierr;
1928 
1929   PetscFunctionBegin;
1930   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1931   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
1932   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 #undef __FUNCT__
1937 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
1938 /*@C
1939    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1940    per row in the matrix. For good matrix assembly performance the
1941    user should preallocate the matrix storage by setting the parameter nz
1942    (or the array nnz).  By setting these parameters accurately, performance
1943    during matrix assembly can be increased by more than a factor of 50.
1944 
1945    Collective on MPI_Comm
1946 
1947    Input Parameters:
1948 +  A - the matrix
1949 .  bs - size of block
1950 .  nz - number of block nonzeros per block row (same for all rows)
1951 -  nnz - array containing the number of block nonzeros in the various block rows
1952          (possibly different for each block row) or PETSC_NULL
1953 
1954    Options Database Keys:
1955 .   -mat_no_unroll - uses code that does not unroll the loops in the
1956                      block calculations (much slower)
1957 .    -mat_block_size - size of the blocks to use
1958 
1959    Level: intermediate
1960 
1961    Notes:
1962    The block AIJ format is fully compatible with standard Fortran 77
1963    storage.  That is, the stored row and column indices can begin at
1964    either one (as in Fortran) or zero.  See the users' manual for details.
1965 
1966    Specify the preallocated storage with either nz or nnz (not both).
1967    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1968    allocation.  For additional details, see the users manual chapter on
1969    matrices.
1970 
1971 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1972 @*/
1973 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1974 {
1975   Mat_SeqBAIJ *b;
1976   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1977   PetscTruth  flg;
1978 
1979   PetscFunctionBegin;
1980   ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr);
1981   if (!flg) PetscFunctionReturn(0);
1982 
1983   B->preallocated = PETSC_TRUE;
1984   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1985   if (nnz && newbs != bs) {
1986     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1987   }
1988   bs   = newbs;
1989 
1990   mbs  = B->m/bs;
1991   nbs  = B->n/bs;
1992   bs2  = bs*bs;
1993 
1994   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1995     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1996   }
1997 
1998   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1999   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
2000   if (nnz) {
2001     for (i=0; i<mbs; i++) {
2002       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
2003       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
2004     }
2005   }
2006 
2007   b       = (Mat_SeqBAIJ*)B->data;
2008   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2009   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2010   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2011   if (!flg) {
2012     switch (bs) {
2013     case 1:
2014       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2015       B->ops->mult            = MatMult_SeqBAIJ_1;
2016       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2017       break;
2018     case 2:
2019       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2020       B->ops->mult            = MatMult_SeqBAIJ_2;
2021       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2022       break;
2023     case 3:
2024       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2025       B->ops->mult            = MatMult_SeqBAIJ_3;
2026       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2027       break;
2028     case 4:
2029       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2030       B->ops->mult            = MatMult_SeqBAIJ_4;
2031       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2032       break;
2033     case 5:
2034       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2035       B->ops->mult            = MatMult_SeqBAIJ_5;
2036       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2037       break;
2038     case 6:
2039       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2040       B->ops->mult            = MatMult_SeqBAIJ_6;
2041       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2042       break;
2043     case 7:
2044       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2045       B->ops->mult            = MatMult_SeqBAIJ_7;
2046       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2047       break;
2048     default:
2049       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2050       B->ops->mult            = MatMult_SeqBAIJ_N;
2051       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2052       break;
2053     }
2054   }
2055   b->bs      = bs;
2056   b->mbs     = mbs;
2057   b->nbs     = nbs;
2058   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
2059   if (!nnz) {
2060     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2061     else if (nz <= 0)        nz = 1;
2062     for (i=0; i<mbs; i++) b->imax[i] = nz;
2063     nz = nz*mbs;
2064   } else {
2065     nz = 0;
2066     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2067   }
2068 
2069   /* allocate the matrix space */
2070   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
2071   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
2072   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2073   b->j  = (int*)(b->a + nz*bs2);
2074   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
2075   b->i  = b->j + nz;
2076   b->singlemalloc = PETSC_TRUE;
2077 
2078   b->i[0] = 0;
2079   for (i=1; i<mbs+1; i++) {
2080     b->i[i] = b->i[i-1] + b->imax[i-1];
2081   }
2082 
2083   /* b->ilen will count nonzeros in each block row so far. */
2084   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
2085   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2086   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2087 
2088   b->bs               = bs;
2089   b->bs2              = bs2;
2090   b->mbs              = mbs;
2091   b->nz               = 0;
2092   b->maxnz            = nz*bs2;
2093   B->info.nz_unneeded = (PetscReal)b->maxnz;
2094   PetscFunctionReturn(0);
2095 }
2096 
2097