xref: /petsc/src/mat/impls/baij/seq/baij.c (revision c8a8475e04bcaa43590892a5c3e60c6f87bc31f7)
1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "petscsys.h"                     /*I "petscmat.h" I*/
11 
12 /*
13     Special version for Fun3d sequential benchmark
14 */
15 #if defined(PETSC_HAVE_FORTRAN_CAPS)
16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
18 #define matsetvaluesblocked4_ matsetvaluesblocked4
19 #endif
20 
21 #undef __FUNCT__
22 #define __FUNCT__ "matsetvaluesblocked4_"
23 void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
24 {
25   Mat         A = *AA;
26   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
27   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
28   int         *ai=a->i,*ailen=a->ilen;
29   int         *aj=a->j,stepval;
30   PetscScalar *value = v;
31   MatScalar   *ap,*aa = a->a,*bap;
32 
33   PetscFunctionBegin;
34   stepval = (n-1)*4;
35   for (k=0; k<m; k++) { /* loop over added rows */
36     row  = im[k];
37     rp   = aj + ai[row];
38     ap   = aa + 16*ai[row];
39     nrow = ailen[row];
40     low  = 0;
41     for (l=0; l<n; l++) { /* loop over added columns */
42       col = in[l];
43       value = v + k*(stepval+4)*4 + l*4;
44       low = 0; high = nrow;
45       while (high-low > 7) {
46         t = (low+high)/2;
47         if (rp[t] > col) high = t;
48         else             low  = t;
49       }
50       for (i=low; i<high; i++) {
51         if (rp[i] > col) break;
52         if (rp[i] == col) {
53           bap  = ap +  16*i;
54           for (ii=0; ii<4; ii++,value+=stepval) {
55             for (jj=ii; jj<16; jj+=4) {
56               bap[jj] += *value++;
57             }
58           }
59           goto noinsert2;
60         }
61       }
62       N = nrow++ - 1;
63       /* shift up all the later entries in this row */
64       for (ii=N; ii>=i; ii--) {
65         rp[ii+1] = rp[ii];
66         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
67       }
68       if (N >= i) {
69         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
70       }
71       rp[i] = col;
72       bap   = ap +  16*i;
73       for (ii=0; ii<4; ii++,value+=stepval) {
74         for (jj=ii; jj<16; jj+=4) {
75           bap[jj] = *value++;
76         }
77       }
78       noinsert2:;
79       low = i;
80     }
81     ailen[row] = nrow;
82   }
83 }
84 
85 #if defined(PETSC_HAVE_FORTRAN_CAPS)
86 #define matsetvalues4_ MATSETVALUES4
87 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
88 #define matsetvalues4_ matsetvalues4
89 #endif
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "MatSetValues4_"
93 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
94 {
95   Mat         A = *AA;
96   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
97   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
98   int         *ai=a->i,*ailen=a->ilen;
99   int         *aj=a->j,brow,bcol;
100   int         ridx,cidx;
101   MatScalar   *ap,value,*aa=a->a,*bap;
102 
103   PetscFunctionBegin;
104   for (k=0; k<m; k++) { /* loop over added rows */
105     row  = im[k]; brow = row/4;
106     rp   = aj + ai[brow];
107     ap   = aa + 16*ai[brow];
108     nrow = ailen[brow];
109     low  = 0;
110     for (l=0; l<n; l++) { /* loop over added columns */
111       col = in[l]; bcol = col/4;
112       ridx = row % 4; cidx = col % 4;
113       value = v[l + k*n];
114       low = 0; high = nrow;
115       while (high-low > 7) {
116         t = (low+high)/2;
117         if (rp[t] > bcol) high = t;
118         else              low  = t;
119       }
120       for (i=low; i<high; i++) {
121         if (rp[i] > bcol) break;
122         if (rp[i] == bcol) {
123           bap  = ap +  16*i + 4*cidx + ridx;
124           *bap += value;
125           goto noinsert1;
126         }
127       }
128       N = nrow++ - 1;
129       /* shift up all the later entries in this row */
130       for (ii=N; ii>=i; ii--) {
131         rp[ii+1] = rp[ii];
132         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
133       }
134       if (N>=i) {
135         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
136       }
137       rp[i]                    = bcol;
138       ap[16*i + 4*cidx + ridx] = value;
139       noinsert1:;
140       low = i;
141     }
142     ailen[brow] = nrow;
143   }
144 }
145 
146 /*  UGLY, ugly, ugly
147    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
148    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
149    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
150    converts the entries into single precision and then calls ..._MatScalar() to put them
151    into the single precision data structures.
152 */
153 #if defined(PETSC_USE_MAT_SINGLE)
154 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
155 #else
156 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
157 #endif
158 #if defined(PETSC_HAVE_DSCPACK)
159 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat);
160 #endif
161 
162 #define CHUNKSIZE  10
163 
164 /*
165      Checks for missing diagonals
166 */
167 #undef __FUNCT__
168 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
169 int MatMissingDiagonal_SeqBAIJ(Mat A)
170 {
171   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
172   int         *diag,*jj = a->j,i,ierr;
173 
174   PetscFunctionBegin;
175   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
176   diag = a->diag;
177   for (i=0; i<a->mbs; i++) {
178     if (jj[diag[i]] != i) {
179       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
180     }
181   }
182   PetscFunctionReturn(0);
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
187 int MatMarkDiagonal_SeqBAIJ(Mat A)
188 {
189   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
190   int         i,j,*diag,m = a->mbs,ierr;
191 
192   PetscFunctionBegin;
193   if (a->diag) PetscFunctionReturn(0);
194 
195   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
196   PetscLogObjectMemory(A,(m+1)*sizeof(int));
197   for (i=0; i<m; i++) {
198     diag[i] = a->i[i+1];
199     for (j=a->i[i]; j<a->i[i+1]; j++) {
200       if (a->j[j] == i) {
201         diag[i] = j;
202         break;
203       }
204     }
205   }
206   a->diag = diag;
207   PetscFunctionReturn(0);
208 }
209 
210 
211 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
212 
213 #undef __FUNCT__
214 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
215 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
216 {
217   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
218   int         ierr,n = a->mbs,i;
219 
220   PetscFunctionBegin;
221   *nn = n;
222   if (!ia) PetscFunctionReturn(0);
223   if (symmetric) {
224     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
225   } else if (oshift == 1) {
226     /* temporarily add 1 to i and j indices */
227     int nz = a->i[n];
228     for (i=0; i<nz; i++) a->j[i]++;
229     for (i=0; i<n+1; i++) a->i[i]++;
230     *ia = a->i; *ja = a->j;
231   } else {
232     *ia = a->i; *ja = a->j;
233   }
234 
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
240 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
241 {
242   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
243   int         i,n = a->mbs,ierr;
244 
245   PetscFunctionBegin;
246   if (!ia) PetscFunctionReturn(0);
247   if (symmetric) {
248     ierr = PetscFree(*ia);CHKERRQ(ierr);
249     ierr = PetscFree(*ja);CHKERRQ(ierr);
250   } else if (oshift == 1) {
251     int nz = a->i[n]-1;
252     for (i=0; i<nz; i++) a->j[i]--;
253     for (i=0; i<n+1; i++) a->i[i]--;
254   }
255   PetscFunctionReturn(0);
256 }
257 
258 #undef __FUNCT__
259 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
260 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
261 {
262   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
263 
264   PetscFunctionBegin;
265   *bs = baij->bs;
266   PetscFunctionReturn(0);
267 }
268 
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatDestroy_SeqBAIJ"
272 int MatDestroy_SeqBAIJ(Mat A)
273 {
274   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
275   int         ierr;
276 
277   PetscFunctionBegin;
278 #if defined(PETSC_USE_LOG)
279   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
280 #endif
281   ierr = PetscFree(a->a);CHKERRQ(ierr);
282   if (!a->singlemalloc) {
283     ierr = PetscFree(a->i);CHKERRQ(ierr);
284     ierr = PetscFree(a->j);CHKERRQ(ierr);
285   }
286   if (a->row) {
287     ierr = ISDestroy(a->row);CHKERRQ(ierr);
288   }
289   if (a->col) {
290     ierr = ISDestroy(a->col);CHKERRQ(ierr);
291   }
292   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
293   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
294   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
295   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
296   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
297   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
298   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
299 #if defined(PETSC_USE_MAT_SINGLE)
300   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
301 #endif
302   ierr = PetscFree(a);CHKERRQ(ierr);
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatSetOption_SeqBAIJ"
308 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
309 {
310   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
311 
312   PetscFunctionBegin;
313   switch (op) {
314   case MAT_ROW_ORIENTED:
315     a->roworiented    = PETSC_TRUE;
316     break;
317   case MAT_COLUMN_ORIENTED:
318     a->roworiented    = PETSC_FALSE;
319     break;
320   case MAT_COLUMNS_SORTED:
321     a->sorted         = PETSC_TRUE;
322     break;
323   case MAT_COLUMNS_UNSORTED:
324     a->sorted         = PETSC_FALSE;
325     break;
326   case MAT_KEEP_ZEROED_ROWS:
327     a->keepzeroedrows = PETSC_TRUE;
328     break;
329   case MAT_NO_NEW_NONZERO_LOCATIONS:
330     a->nonew          = 1;
331     break;
332   case MAT_NEW_NONZERO_LOCATION_ERR:
333     a->nonew          = -1;
334     break;
335   case MAT_NEW_NONZERO_ALLOCATION_ERR:
336     a->nonew          = -2;
337     break;
338   case MAT_YES_NEW_NONZERO_LOCATIONS:
339     a->nonew          = 0;
340     break;
341   case MAT_ROWS_SORTED:
342   case MAT_ROWS_UNSORTED:
343   case MAT_YES_NEW_DIAGONALS:
344   case MAT_IGNORE_OFF_PROC_ENTRIES:
345   case MAT_USE_HASH_TABLE:
346     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
347     break;
348   case MAT_NO_NEW_DIAGONALS:
349     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
350   case MAT_USE_SINGLE_PRECISION_SOLVES:
351     if (a->bs==4) {
352       PetscTruth sse_enabled_local,sse_enabled_global;
353       int        ierr;
354 
355       sse_enabled_local  = PETSC_FALSE;
356       sse_enabled_global = PETSC_FALSE;
357 
358       ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr);
359 #if defined(PETSC_HAVE_SSE)
360       if (sse_enabled_local) {
361           a->single_precision_solves = PETSC_TRUE;
362           A->ops->solve              = MatSolve_SeqBAIJ_Update;
363           A->ops->solvetranspose     = MatSolveTranspose_SeqBAIJ_Update;
364           PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n");
365           break;
366       } else {
367         PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
368       }
369 #else
370       PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
371 #endif
372     }
373     break;
374   default:
375     SETERRQ(PETSC_ERR_SUP,"unknown option");
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 #undef __FUNCT__
381 #define __FUNCT__ "MatGetRow_SeqBAIJ"
382 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
383 {
384   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
385   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
386   MatScalar    *aa,*aa_i;
387   PetscScalar  *v_i;
388 
389   PetscFunctionBegin;
390   bs  = a->bs;
391   ai  = a->i;
392   aj  = a->j;
393   aa  = a->a;
394   bs2 = a->bs2;
395 
396   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
397 
398   bn  = row/bs;   /* Block number */
399   bp  = row % bs; /* Block Position */
400   M   = ai[bn+1] - ai[bn];
401   *nz = bs*M;
402 
403   if (v) {
404     *v = 0;
405     if (*nz) {
406       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
407       for (i=0; i<M; i++) { /* for each block in the block row */
408         v_i  = *v + i*bs;
409         aa_i = aa + bs2*(ai[bn] + i);
410         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
411       }
412     }
413   }
414 
415   if (idx) {
416     *idx = 0;
417     if (*nz) {
418       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
419       for (i=0; i<M; i++) { /* for each block in the block row */
420         idx_i = *idx + i*bs;
421         itmp  = bs*aj[ai[bn] + i];
422         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
423       }
424     }
425   }
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
431 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
432 {
433   int ierr;
434 
435   PetscFunctionBegin;
436   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
437   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
438   PetscFunctionReturn(0);
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "MatTranspose_SeqBAIJ"
443 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
444 {
445   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
446   Mat         C;
447   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
448   int         *rows,*cols,bs2=a->bs2;
449   PetscScalar *array;
450 
451   PetscFunctionBegin;
452   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
453   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
454   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
455 
456 #if defined(PETSC_USE_MAT_SINGLE)
457   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
458   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
459 #else
460   array = a->a;
461 #endif
462 
463   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
464   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
465   ierr = PetscFree(col);CHKERRQ(ierr);
466   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
467   cols = rows + bs;
468   for (i=0; i<mbs; i++) {
469     cols[0] = i*bs;
470     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
471     len = ai[i+1] - ai[i];
472     for (j=0; j<len; j++) {
473       rows[0] = (*aj++)*bs;
474       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
475       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
476       array += bs2;
477     }
478   }
479   ierr = PetscFree(rows);CHKERRQ(ierr);
480 #if defined(PETSC_USE_MAT_SINGLE)
481   ierr = PetscFree(array);
482 #endif
483 
484   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
485   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
486 
487   if (B) {
488     *B = C;
489   } else {
490     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
491   }
492   PetscFunctionReturn(0);
493 }
494 
495 #undef __FUNCT__
496 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
497 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
498 {
499   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
500   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
501   PetscScalar *aa;
502   FILE        *file;
503 
504   PetscFunctionBegin;
505   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
506   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
507   col_lens[0] = MAT_FILE_COOKIE;
508 
509   col_lens[1] = A->m;
510   col_lens[2] = A->n;
511   col_lens[3] = a->nz*bs2;
512 
513   /* store lengths of each row and write (including header) to file */
514   count = 0;
515   for (i=0; i<a->mbs; i++) {
516     for (j=0; j<bs; j++) {
517       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
518     }
519   }
520   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
521   ierr = PetscFree(col_lens);CHKERRQ(ierr);
522 
523   /* store column indices (zero start index) */
524   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
525   count = 0;
526   for (i=0; i<a->mbs; i++) {
527     for (j=0; j<bs; j++) {
528       for (k=a->i[i]; k<a->i[i+1]; k++) {
529         for (l=0; l<bs; l++) {
530           jj[count++] = bs*a->j[k] + l;
531         }
532       }
533     }
534   }
535   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
536   ierr = PetscFree(jj);CHKERRQ(ierr);
537 
538   /* store nonzero values */
539   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
540   count = 0;
541   for (i=0; i<a->mbs; i++) {
542     for (j=0; j<bs; j++) {
543       for (k=a->i[i]; k<a->i[i+1]; k++) {
544         for (l=0; l<bs; l++) {
545           aa[count++] = a->a[bs2*k + l*bs + j];
546         }
547       }
548     }
549   }
550   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
551   ierr = PetscFree(aa);CHKERRQ(ierr);
552 
553   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
554   if (file) {
555     fprintf(file,"-matload_block_size %d\n",a->bs);
556   }
557   PetscFunctionReturn(0);
558 }
559 
560 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer);
561 
562 #undef __FUNCT__
563 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
564 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
565 {
566   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
567   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
568   PetscViewerFormat format;
569 
570   PetscFunctionBegin;
571   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
572   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
573     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
574   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
575     Mat aij;
576     ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr);
577     ierr = MatView(aij,viewer);CHKERRQ(ierr);
578     ierr = MatDestroy(aij);CHKERRQ(ierr);
579   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
580 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
581      ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr);
582 #endif
583      PetscFunctionReturn(0);
584   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
585     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
586     for (i=0; i<a->mbs; i++) {
587       for (j=0; j<bs; j++) {
588         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
589         for (k=a->i[i]; k<a->i[i+1]; k++) {
590           for (l=0; l<bs; l++) {
591 #if defined(PETSC_USE_COMPLEX)
592             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
593               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
594                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
595             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
596               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
597                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
598             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
599               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
600             }
601 #else
602             if (a->a[bs2*k + l*bs + j] != 0.0) {
603               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
604             }
605 #endif
606           }
607         }
608         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
609       }
610     }
611     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
612   } else {
613     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
614     for (i=0; i<a->mbs; i++) {
615       for (j=0; j<bs; j++) {
616         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
617         for (k=a->i[i]; k<a->i[i+1]; k++) {
618           for (l=0; l<bs; l++) {
619 #if defined(PETSC_USE_COMPLEX)
620             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
621               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
622                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
623             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
624               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
625                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
626             } else {
627               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
628             }
629 #else
630             ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
631 #endif
632           }
633         }
634         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
635       }
636     }
637     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
638   }
639   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
640   PetscFunctionReturn(0);
641 }
642 
643 #undef __FUNCT__
644 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
645 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
646 {
647   Mat          A = (Mat) Aa;
648   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
649   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
650   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
651   MatScalar    *aa;
652   PetscViewer  viewer;
653 
654   PetscFunctionBegin;
655 
656   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
657   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
658 
659   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
660 
661   /* loop over matrix elements drawing boxes */
662   color = PETSC_DRAW_BLUE;
663   for (i=0,row=0; i<mbs; i++,row+=bs) {
664     for (j=a->i[i]; j<a->i[i+1]; j++) {
665       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
666       x_l = a->j[j]*bs; x_r = x_l + 1.0;
667       aa = a->a + j*bs2;
668       for (k=0; k<bs; k++) {
669         for (l=0; l<bs; l++) {
670           if (PetscRealPart(*aa++) >=  0.) continue;
671           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
672         }
673       }
674     }
675   }
676   color = PETSC_DRAW_CYAN;
677   for (i=0,row=0; i<mbs; i++,row+=bs) {
678     for (j=a->i[i]; j<a->i[i+1]; j++) {
679       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
680       x_l = a->j[j]*bs; x_r = x_l + 1.0;
681       aa = a->a + j*bs2;
682       for (k=0; k<bs; k++) {
683         for (l=0; l<bs; l++) {
684           if (PetscRealPart(*aa++) != 0.) continue;
685           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
686         }
687       }
688     }
689   }
690 
691   color = PETSC_DRAW_RED;
692   for (i=0,row=0; i<mbs; i++,row+=bs) {
693     for (j=a->i[i]; j<a->i[i+1]; j++) {
694       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
695       x_l = a->j[j]*bs; x_r = x_l + 1.0;
696       aa = a->a + j*bs2;
697       for (k=0; k<bs; k++) {
698         for (l=0; l<bs; l++) {
699           if (PetscRealPart(*aa++) <= 0.) continue;
700           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
701         }
702       }
703     }
704   }
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
710 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
711 {
712   int          ierr;
713   PetscReal    xl,yl,xr,yr,w,h;
714   PetscDraw    draw;
715   PetscTruth   isnull;
716 
717   PetscFunctionBegin;
718 
719   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
720   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
721 
722   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
723   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
724   xr += w;    yr += h;  xl = -w;     yl = -h;
725   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
726   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
727   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatView_SeqBAIJ"
733 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
734 {
735   int        ierr;
736   PetscTruth issocket,isascii,isbinary,isdraw;
737 
738   PetscFunctionBegin;
739   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
740   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
741   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
742   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
743   if (issocket) {
744     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
745   } else if (isascii){
746     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
747   } else if (isbinary) {
748     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
749   } else if (isdraw) {
750     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
751   } else {
752     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
753   }
754   PetscFunctionReturn(0);
755 }
756 
757 
758 #undef __FUNCT__
759 #define __FUNCT__ "MatGetValues_SeqBAIJ"
760 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
761 {
762   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
763   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
764   int        *ai = a->i,*ailen = a->ilen;
765   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
766   MatScalar  *ap,*aa = a->a,zero = 0.0;
767 
768   PetscFunctionBegin;
769   for (k=0; k<m; k++) { /* loop over rows */
770     row  = im[k]; brow = row/bs;
771     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
772     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
773     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
774     nrow = ailen[brow];
775     for (l=0; l<n; l++) { /* loop over columns */
776       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
777       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
778       col  = in[l] ;
779       bcol = col/bs;
780       cidx = col%bs;
781       ridx = row%bs;
782       high = nrow;
783       low  = 0; /* assume unsorted */
784       while (high-low > 5) {
785         t = (low+high)/2;
786         if (rp[t] > bcol) high = t;
787         else             low  = t;
788       }
789       for (i=low; i<high; i++) {
790         if (rp[i] > bcol) break;
791         if (rp[i] == bcol) {
792           *v++ = ap[bs2*i+bs*cidx+ridx];
793           goto finished;
794         }
795       }
796       *v++ = zero;
797       finished:;
798     }
799   }
800   PetscFunctionReturn(0);
801 }
802 
803 #if defined(PETSC_USE_MAT_SINGLE)
804 #undef __FUNCT__
805 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
806 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
807 {
808   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
809   int         ierr,i,N = m*n*b->bs2;
810   MatScalar   *vsingle;
811 
812   PetscFunctionBegin;
813   if (N > b->setvalueslen) {
814     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
815     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
816     b->setvalueslen  = N;
817   }
818   vsingle = b->setvaluescopy;
819   for (i=0; i<N; i++) {
820     vsingle[i] = v[i];
821   }
822   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
823   PetscFunctionReturn(0);
824 }
825 #endif
826 
827 
828 #undef __FUNCT__
829 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
830 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
831 {
832   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
833   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
834   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
835   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
836   PetscTruth  roworiented=a->roworiented;
837   MatScalar   *value = v,*ap,*aa = a->a,*bap;
838 
839   PetscFunctionBegin;
840   if (roworiented) {
841     stepval = (n-1)*bs;
842   } else {
843     stepval = (m-1)*bs;
844   }
845   for (k=0; k<m; k++) { /* loop over added rows */
846     row  = im[k];
847     if (row < 0) continue;
848 #if defined(PETSC_USE_BOPT_g)
849     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
850 #endif
851     rp   = aj + ai[row];
852     ap   = aa + bs2*ai[row];
853     rmax = imax[row];
854     nrow = ailen[row];
855     low  = 0;
856     for (l=0; l<n; l++) { /* loop over added columns */
857       if (in[l] < 0) continue;
858 #if defined(PETSC_USE_BOPT_g)
859       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
860 #endif
861       col = in[l];
862       if (roworiented) {
863         value = v + k*(stepval+bs)*bs + l*bs;
864       } else {
865         value = v + l*(stepval+bs)*bs + k*bs;
866       }
867       if (!sorted) low = 0; high = nrow;
868       while (high-low > 7) {
869         t = (low+high)/2;
870         if (rp[t] > col) high = t;
871         else             low  = t;
872       }
873       for (i=low; i<high; i++) {
874         if (rp[i] > col) break;
875         if (rp[i] == col) {
876           bap  = ap +  bs2*i;
877           if (roworiented) {
878             if (is == ADD_VALUES) {
879               for (ii=0; ii<bs; ii++,value+=stepval) {
880                 for (jj=ii; jj<bs2; jj+=bs) {
881                   bap[jj] += *value++;
882                 }
883               }
884             } else {
885               for (ii=0; ii<bs; ii++,value+=stepval) {
886                 for (jj=ii; jj<bs2; jj+=bs) {
887                   bap[jj] = *value++;
888                 }
889               }
890             }
891           } else {
892             if (is == ADD_VALUES) {
893               for (ii=0; ii<bs; ii++,value+=stepval) {
894                 for (jj=0; jj<bs; jj++) {
895                   *bap++ += *value++;
896                 }
897               }
898             } else {
899               for (ii=0; ii<bs; ii++,value+=stepval) {
900                 for (jj=0; jj<bs; jj++) {
901                   *bap++  = *value++;
902                 }
903               }
904             }
905           }
906           goto noinsert2;
907         }
908       }
909       if (nonew == 1) goto noinsert2;
910       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
911       if (nrow >= rmax) {
912         /* there is no extra room in row, therefore enlarge */
913         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
914         MatScalar *new_a;
915 
916         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
917 
918         /* malloc new storage space */
919         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
920 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
921         new_j   = (int*)(new_a + bs2*new_nz);
922         new_i   = new_j + new_nz;
923 
924         /* copy over old data into new slots */
925         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
926         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
927         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
928         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
929         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
930         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
931         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
932         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
933         /* free up old matrix storage */
934         ierr = PetscFree(a->a);CHKERRQ(ierr);
935         if (!a->singlemalloc) {
936           ierr = PetscFree(a->i);CHKERRQ(ierr);
937           ierr = PetscFree(a->j);CHKERRQ(ierr);
938         }
939         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
940         a->singlemalloc = PETSC_TRUE;
941 
942         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
943         rmax = imax[row] = imax[row] + CHUNKSIZE;
944         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
945         a->maxnz += bs2*CHUNKSIZE;
946         a->reallocs++;
947         a->nz++;
948       }
949       N = nrow++ - 1;
950       /* shift up all the later entries in this row */
951       for (ii=N; ii>=i; ii--) {
952         rp[ii+1] = rp[ii];
953         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
954       }
955       if (N >= i) {
956         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
957       }
958       rp[i] = col;
959       bap   = ap +  bs2*i;
960       if (roworiented) {
961         for (ii=0; ii<bs; ii++,value+=stepval) {
962           for (jj=ii; jj<bs2; jj+=bs) {
963             bap[jj] = *value++;
964           }
965         }
966       } else {
967         for (ii=0; ii<bs; ii++,value+=stepval) {
968           for (jj=0; jj<bs; jj++) {
969             *bap++  = *value++;
970           }
971         }
972       }
973       noinsert2:;
974       low = i;
975     }
976     ailen[row] = nrow;
977   }
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
983 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
984 {
985   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
986   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
987   int        m = A->m,*ip,N,*ailen = a->ilen;
988   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
989   MatScalar  *aa = a->a,*ap;
990 #if defined(PETSC_HAVE_DSCPACK)
991   PetscTruth   flag;
992 #endif
993 
994   PetscFunctionBegin;
995   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
996 
997   if (m) rmax = ailen[0];
998   for (i=1; i<mbs; i++) {
999     /* move each row back by the amount of empty slots (fshift) before it*/
1000     fshift += imax[i-1] - ailen[i-1];
1001     rmax   = PetscMax(rmax,ailen[i]);
1002     if (fshift) {
1003       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1004       N = ailen[i];
1005       for (j=0; j<N; j++) {
1006         ip[j-fshift] = ip[j];
1007         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1008       }
1009     }
1010     ai[i] = ai[i-1] + ailen[i-1];
1011   }
1012   if (mbs) {
1013     fshift += imax[mbs-1] - ailen[mbs-1];
1014     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1015   }
1016   /* reset ilen and imax for each row */
1017   for (i=0; i<mbs; i++) {
1018     ailen[i] = imax[i] = ai[i+1] - ai[i];
1019   }
1020   a->nz = ai[mbs];
1021 
1022   /* diagonals may have moved, so kill the diagonal pointers */
1023   if (fshift && a->diag) {
1024     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1025     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
1026     a->diag = 0;
1027   }
1028   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1029   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
1030   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
1031   a->reallocs          = 0;
1032   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1033 
1034 #if defined(PETSC_HAVE_DSCPACK)
1035   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);CHKERRQ(ierr);
1036   if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); }
1037 #endif
1038 
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 
1043 
1044 /*
1045    This function returns an array of flags which indicate the locations of contiguous
1046    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1047    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1048    Assume: sizes should be long enough to hold all the values.
1049 */
1050 #undef __FUNCT__
1051 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1052 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1053 {
1054   int        i,j,k,row;
1055   PetscTruth flg;
1056 
1057   PetscFunctionBegin;
1058   for (i=0,j=0; i<n; j++) {
1059     row = idx[i];
1060     if (row%bs!=0) { /* Not the begining of a block */
1061       sizes[j] = 1;
1062       i++;
1063     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1064       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1065       i++;
1066     } else { /* Begining of the block, so check if the complete block exists */
1067       flg = PETSC_TRUE;
1068       for (k=1; k<bs; k++) {
1069         if (row+k != idx[i+k]) { /* break in the block */
1070           flg = PETSC_FALSE;
1071           break;
1072         }
1073       }
1074       if (flg == PETSC_TRUE) { /* No break in the bs */
1075         sizes[j] = bs;
1076         i+= bs;
1077       } else {
1078         sizes[j] = 1;
1079         i++;
1080       }
1081     }
1082   }
1083   *bs_max = j;
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1089 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag)
1090 {
1091   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1092   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
1093   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1094   PetscScalar zero = 0.0;
1095   MatScalar   *aa;
1096 
1097   PetscFunctionBegin;
1098   /* Make a copy of the IS and  sort it */
1099   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1100   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1101 
1102   /* allocate memory for rows,sizes */
1103   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
1104   sizes = rows + is_n;
1105 
1106   /* copy IS values to rows, and sort them */
1107   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1108   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1109   if (baij->keepzeroedrows) {
1110     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1111     bs_max = is_n;
1112   } else {
1113     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1114   }
1115   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1116 
1117   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1118     row   = rows[j];
1119     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1120     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1121     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1122     if (sizes[i] == bs && !baij->keepzeroedrows) {
1123       if (diag) {
1124         if (baij->ilen[row/bs] > 0) {
1125           baij->ilen[row/bs]       = 1;
1126           baij->j[baij->i[row/bs]] = row/bs;
1127           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1128         }
1129         /* Now insert all the diagonal values for this bs */
1130         for (k=0; k<bs; k++) {
1131           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1132         }
1133       } else { /* (!diag) */
1134         baij->ilen[row/bs] = 0;
1135       } /* end (!diag) */
1136     } else { /* (sizes[i] != bs) */
1137 #if defined (PETSC_USE_DEBUG)
1138       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1139 #endif
1140       for (k=0; k<count; k++) {
1141         aa[0] =  zero;
1142         aa    += bs;
1143       }
1144       if (diag) {
1145         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1146       }
1147     }
1148   }
1149 
1150   ierr = PetscFree(rows);CHKERRQ(ierr);
1151   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1157 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
1158 {
1159   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1160   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1161   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1162   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1163   int         ridx,cidx,bs2=a->bs2,ierr;
1164   PetscTruth  roworiented=a->roworiented;
1165   MatScalar   *ap,value,*aa=a->a,*bap;
1166 
1167   PetscFunctionBegin;
1168   for (k=0; k<m; k++) { /* loop over added rows */
1169     row  = im[k]; brow = row/bs;
1170     if (row < 0) continue;
1171 #if defined(PETSC_USE_BOPT_g)
1172     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1173 #endif
1174     rp   = aj + ai[brow];
1175     ap   = aa + bs2*ai[brow];
1176     rmax = imax[brow];
1177     nrow = ailen[brow];
1178     low  = 0;
1179     for (l=0; l<n; l++) { /* loop over added columns */
1180       if (in[l] < 0) continue;
1181 #if defined(PETSC_USE_BOPT_g)
1182       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1183 #endif
1184       col = in[l]; bcol = col/bs;
1185       ridx = row % bs; cidx = col % bs;
1186       if (roworiented) {
1187         value = v[l + k*n];
1188       } else {
1189         value = v[k + l*m];
1190       }
1191       if (!sorted) low = 0; high = nrow;
1192       while (high-low > 7) {
1193         t = (low+high)/2;
1194         if (rp[t] > bcol) high = t;
1195         else              low  = t;
1196       }
1197       for (i=low; i<high; i++) {
1198         if (rp[i] > bcol) break;
1199         if (rp[i] == bcol) {
1200           bap  = ap +  bs2*i + bs*cidx + ridx;
1201           if (is == ADD_VALUES) *bap += value;
1202           else                  *bap  = value;
1203           goto noinsert1;
1204         }
1205       }
1206       if (nonew == 1) goto noinsert1;
1207       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1208       if (nrow >= rmax) {
1209         /* there is no extra room in row, therefore enlarge */
1210         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1211         MatScalar *new_a;
1212 
1213         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1214 
1215         /* Malloc new storage space */
1216         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1217 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1218         new_j   = (int*)(new_a + bs2*new_nz);
1219         new_i   = new_j + new_nz;
1220 
1221         /* copy over old data into new slots */
1222         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1223         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1224         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1225         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1226         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1227         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1228         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1229         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1230         /* free up old matrix storage */
1231         ierr = PetscFree(a->a);CHKERRQ(ierr);
1232         if (!a->singlemalloc) {
1233           ierr = PetscFree(a->i);CHKERRQ(ierr);
1234           ierr = PetscFree(a->j);CHKERRQ(ierr);
1235         }
1236         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1237         a->singlemalloc = PETSC_TRUE;
1238 
1239         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1240         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1241         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1242         a->maxnz += bs2*CHUNKSIZE;
1243         a->reallocs++;
1244         a->nz++;
1245       }
1246       N = nrow++ - 1;
1247       /* shift up all the later entries in this row */
1248       for (ii=N; ii>=i; ii--) {
1249         rp[ii+1] = rp[ii];
1250         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1251       }
1252       if (N>=i) {
1253         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1254       }
1255       rp[i]                      = bcol;
1256       ap[bs2*i + bs*cidx + ridx] = value;
1257       noinsert1:;
1258       low = i;
1259     }
1260     ailen[brow] = nrow;
1261   }
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1268 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1269 {
1270   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1271   Mat         outA;
1272   int         ierr;
1273   PetscTruth  row_identity,col_identity;
1274 
1275   PetscFunctionBegin;
1276   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1277   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1278   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1279   if (!row_identity || !col_identity) {
1280     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1281   }
1282 
1283   outA          = inA;
1284   inA->factor   = FACTOR_LU;
1285 
1286   if (!a->diag) {
1287     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1288   }
1289 
1290   a->row        = row;
1291   a->col        = col;
1292   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1293   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1294 
1295   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1296   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1297   PetscLogObjectParent(inA,a->icol);
1298 
1299   /*
1300       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1301       for ILU(0) factorization with natural ordering
1302   */
1303   if (a->bs < 8) {
1304     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1305   } else {
1306     if (!a->solve_work) {
1307       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1308       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1309     }
1310   }
1311 
1312   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1313 
1314   PetscFunctionReturn(0);
1315 }
1316 #undef __FUNCT__
1317 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1318 int MatPrintHelp_SeqBAIJ(Mat A)
1319 {
1320   static PetscTruth called = PETSC_FALSE;
1321   MPI_Comm          comm = A->comm;
1322   int               ierr;
1323 
1324   PetscFunctionBegin;
1325   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1326   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1327   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 EXTERN_C_BEGIN
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1334 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1335 {
1336   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1337   int         i,nz,nbs;
1338 
1339   PetscFunctionBegin;
1340   nz  = baij->maxnz/baij->bs2;
1341   nbs = baij->nbs;
1342   for (i=0; i<nz; i++) {
1343     baij->j[i] = indices[i];
1344   }
1345   baij->nz = nz;
1346   for (i=0; i<nbs; i++) {
1347     baij->ilen[i] = baij->imax[i];
1348   }
1349 
1350   PetscFunctionReturn(0);
1351 }
1352 EXTERN_C_END
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1356 /*@
1357     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1358        in the matrix.
1359 
1360   Input Parameters:
1361 +  mat - the SeqBAIJ matrix
1362 -  indices - the column indices
1363 
1364   Level: advanced
1365 
1366   Notes:
1367     This can be called if you have precomputed the nonzero structure of the
1368   matrix and want to provide it to the matrix object to improve the performance
1369   of the MatSetValues() operation.
1370 
1371     You MUST have set the correct numbers of nonzeros per row in the call to
1372   MatCreateSeqBAIJ().
1373 
1374     MUST be called before any calls to MatSetValues();
1375 
1376 @*/
1377 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1378 {
1379   int ierr,(*f)(Mat,int *);
1380 
1381   PetscFunctionBegin;
1382   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1383   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1384   if (f) {
1385     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1386   } else {
1387     SETERRQ(1,"Wrong type of matrix to set column indices");
1388   }
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 #undef __FUNCT__
1393 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1394 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1395 {
1396   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1397   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1398   PetscReal    atmp;
1399   PetscScalar  *x,zero = 0.0;
1400   MatScalar    *aa;
1401   int          ncols,brow,krow,kcol;
1402 
1403   PetscFunctionBegin;
1404   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1405   bs   = a->bs;
1406   aa   = a->a;
1407   ai   = a->i;
1408   aj   = a->j;
1409   mbs = a->mbs;
1410 
1411   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1412   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1413   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1414   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1415   for (i=0; i<mbs; i++) {
1416     ncols = ai[1] - ai[0]; ai++;
1417     brow  = bs*i;
1418     for (j=0; j<ncols; j++){
1419       /* bcol = bs*(*aj); */
1420       for (kcol=0; kcol<bs; kcol++){
1421         for (krow=0; krow<bs; krow++){
1422           atmp = PetscAbsScalar(*aa); aa++;
1423           row = brow + krow;    /* row index */
1424           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1425           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1426         }
1427       }
1428       aj++;
1429     }
1430   }
1431   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 #undef __FUNCT__
1436 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1437 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1438 {
1439   int        ierr;
1440 
1441   PetscFunctionBegin;
1442   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1448 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array)
1449 {
1450   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1451   PetscFunctionBegin;
1452   *array = a->a;
1453   PetscFunctionReturn(0);
1454 }
1455 
1456 #undef __FUNCT__
1457 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1458 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array)
1459 {
1460   PetscFunctionBegin;
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 /* -------------------------------------------------------------------*/
1465 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1466        MatGetRow_SeqBAIJ,
1467        MatRestoreRow_SeqBAIJ,
1468        MatMult_SeqBAIJ_N,
1469        MatMultAdd_SeqBAIJ_N,
1470        MatMultTranspose_SeqBAIJ,
1471        MatMultTransposeAdd_SeqBAIJ,
1472        MatSolve_SeqBAIJ_N,
1473        0,
1474        0,
1475        0,
1476        MatLUFactor_SeqBAIJ,
1477        0,
1478        0,
1479        MatTranspose_SeqBAIJ,
1480        MatGetInfo_SeqBAIJ,
1481        MatEqual_SeqBAIJ,
1482        MatGetDiagonal_SeqBAIJ,
1483        MatDiagonalScale_SeqBAIJ,
1484        MatNorm_SeqBAIJ,
1485        0,
1486        MatAssemblyEnd_SeqBAIJ,
1487        0,
1488        MatSetOption_SeqBAIJ,
1489        MatZeroEntries_SeqBAIJ,
1490        MatZeroRows_SeqBAIJ,
1491        MatLUFactorSymbolic_SeqBAIJ,
1492        MatLUFactorNumeric_SeqBAIJ_N,
1493        0,
1494        0,
1495        MatSetUpPreallocation_SeqBAIJ,
1496        MatILUFactorSymbolic_SeqBAIJ,
1497        0,
1498        MatGetArray_SeqBAIJ,
1499        MatRestoreArray_SeqBAIJ,
1500        MatDuplicate_SeqBAIJ,
1501        0,
1502        0,
1503        MatILUFactor_SeqBAIJ,
1504        0,
1505        0,
1506        MatGetSubMatrices_SeqBAIJ,
1507        MatIncreaseOverlap_SeqBAIJ,
1508        MatGetValues_SeqBAIJ,
1509        0,
1510        MatPrintHelp_SeqBAIJ,
1511        MatScale_SeqBAIJ,
1512        0,
1513        0,
1514        0,
1515        MatGetBlockSize_SeqBAIJ,
1516        MatGetRowIJ_SeqBAIJ,
1517        MatRestoreRowIJ_SeqBAIJ,
1518        0,
1519        0,
1520        0,
1521        0,
1522        0,
1523        0,
1524        MatSetValuesBlocked_SeqBAIJ,
1525        MatGetSubMatrix_SeqBAIJ,
1526        MatDestroy_SeqBAIJ,
1527        MatView_SeqBAIJ,
1528        MatGetPetscMaps_Petsc,
1529        0,
1530        0,
1531        0,
1532        0,
1533        0,
1534        0,
1535        MatGetRowMax_SeqBAIJ,
1536        MatConvert_Basic};
1537 
1538 EXTERN_C_BEGIN
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1541 int MatStoreValues_SeqBAIJ(Mat mat)
1542 {
1543   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1544   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1545   int         ierr;
1546 
1547   PetscFunctionBegin;
1548   if (aij->nonew != 1) {
1549     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1550   }
1551 
1552   /* allocate space for values if not already there */
1553   if (!aij->saved_values) {
1554     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1555   }
1556 
1557   /* copy values over */
1558   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1559   PetscFunctionReturn(0);
1560 }
1561 EXTERN_C_END
1562 
1563 EXTERN_C_BEGIN
1564 #undef __FUNCT__
1565 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1566 int MatRetrieveValues_SeqBAIJ(Mat mat)
1567 {
1568   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1569   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1570 
1571   PetscFunctionBegin;
1572   if (aij->nonew != 1) {
1573     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1574   }
1575   if (!aij->saved_values) {
1576     SETERRQ(1,"Must call MatStoreValues(A);first");
1577   }
1578 
1579   /* copy values over */
1580   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1581   PetscFunctionReturn(0);
1582 }
1583 EXTERN_C_END
1584 
1585 EXTERN_C_BEGIN
1586 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1587 EXTERN_C_END
1588 
1589 EXTERN_C_BEGIN
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatCreate_SeqBAIJ"
1592 int MatCreate_SeqBAIJ(Mat B)
1593 {
1594   int         ierr,size;
1595   Mat_SeqBAIJ *b;
1596 
1597   PetscFunctionBegin;
1598   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1599   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1600 
1601   B->m = B->M = PetscMax(B->m,B->M);
1602   B->n = B->N = PetscMax(B->n,B->N);
1603   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1604   B->data = (void*)b;
1605   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1606   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1607   B->factor           = 0;
1608   B->lupivotthreshold = 1.0;
1609   B->mapping          = 0;
1610   b->row              = 0;
1611   b->col              = 0;
1612   b->icol             = 0;
1613   b->reallocs         = 0;
1614   b->saved_values     = 0;
1615 #if defined(PETSC_USE_MAT_SINGLE)
1616   b->setvalueslen     = 0;
1617   b->setvaluescopy    = PETSC_NULL;
1618 #endif
1619   b->single_precision_solves = PETSC_FALSE;
1620 
1621   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1622   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1623 
1624   b->sorted           = PETSC_FALSE;
1625   b->roworiented      = PETSC_TRUE;
1626   b->nonew            = 0;
1627   b->diag             = 0;
1628   b->solve_work       = 0;
1629   b->mult_work        = 0;
1630   B->spptr            = 0;
1631   B->info.nz_unneeded = (PetscReal)b->maxnz;
1632   b->keepzeroedrows   = PETSC_FALSE;
1633 
1634   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1635                                      "MatStoreValues_SeqBAIJ",
1636                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1637   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1638                                      "MatRetrieveValues_SeqBAIJ",
1639                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1640   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1641                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1642                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1643   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1644                                      "MatConvert_SeqBAIJ_SeqAIJ",
1645                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1646   PetscFunctionReturn(0);
1647 }
1648 EXTERN_C_END
1649 
1650 #undef __FUNCT__
1651 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1652 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1653 {
1654   Mat         C;
1655   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1656   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1657 
1658   PetscFunctionBegin;
1659   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1660 
1661   *B = 0;
1662   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1663   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1664   c    = (Mat_SeqBAIJ*)C->data;
1665 
1666   c->bs         = a->bs;
1667   c->bs2        = a->bs2;
1668   c->mbs        = a->mbs;
1669   c->nbs        = a->nbs;
1670   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1671 
1672   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1673   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1674   for (i=0; i<mbs; i++) {
1675     c->imax[i] = a->imax[i];
1676     c->ilen[i] = a->ilen[i];
1677   }
1678 
1679   /* allocate the matrix space */
1680   c->singlemalloc = PETSC_TRUE;
1681   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1682   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1683   c->j = (int*)(c->a + nz*bs2);
1684   c->i = c->j + nz;
1685   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1686   if (mbs > 0) {
1687     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1688     if (cpvalues == MAT_COPY_VALUES) {
1689       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1690     } else {
1691       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1692     }
1693   }
1694 
1695   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1696   c->sorted      = a->sorted;
1697   c->roworiented = a->roworiented;
1698   c->nonew       = a->nonew;
1699 
1700   if (a->diag) {
1701     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1702     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1703     for (i=0; i<mbs; i++) {
1704       c->diag[i] = a->diag[i];
1705     }
1706   } else c->diag        = 0;
1707   c->nz                 = a->nz;
1708   c->maxnz              = a->maxnz;
1709   c->solve_work         = 0;
1710   C->spptr              = 0;     /* Dangerous -I'm throwing away a->spptr */
1711   c->mult_work          = 0;
1712   C->preallocated       = PETSC_TRUE;
1713   C->assembled          = PETSC_TRUE;
1714   *B = C;
1715   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 EXTERN_C_BEGIN
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatLoad_SeqBAIJ"
1722 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1723 {
1724   Mat_SeqBAIJ  *a;
1725   Mat          B;
1726   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1727   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1728   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1729   int          *masked,nmask,tmp,bs2,ishift;
1730   PetscScalar  *aa;
1731   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1732 
1733   PetscFunctionBegin;
1734   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1735   bs2  = bs*bs;
1736 
1737   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1738   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1739   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1740   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1741   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1742   M = header[1]; N = header[2]; nz = header[3];
1743 
1744   if (header[3] < 0) {
1745     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1746   }
1747 
1748   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1749 
1750   /*
1751      This code adds extra rows to make sure the number of rows is
1752     divisible by the blocksize
1753   */
1754   mbs        = M/bs;
1755   extra_rows = bs - M + bs*(mbs);
1756   if (extra_rows == bs) extra_rows = 0;
1757   else                  mbs++;
1758   if (extra_rows) {
1759     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1760   }
1761 
1762   /* read in row lengths */
1763   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1764   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1765   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1766 
1767   /* read in column indices */
1768   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1769   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1770   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1771 
1772   /* loop over row lengths determining block row lengths */
1773   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1774   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1775   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1776   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1777   masked   = mask + mbs;
1778   rowcount = 0; nzcount = 0;
1779   for (i=0; i<mbs; i++) {
1780     nmask = 0;
1781     for (j=0; j<bs; j++) {
1782       kmax = rowlengths[rowcount];
1783       for (k=0; k<kmax; k++) {
1784         tmp = jj[nzcount++]/bs;
1785         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1786       }
1787       rowcount++;
1788     }
1789     browlengths[i] += nmask;
1790     /* zero out the mask elements we set */
1791     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1792   }
1793 
1794   /* create our matrix */
1795   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1796   B = *A;
1797   a = (Mat_SeqBAIJ*)B->data;
1798 
1799   /* set matrix "i" values */
1800   a->i[0] = 0;
1801   for (i=1; i<= mbs; i++) {
1802     a->i[i]      = a->i[i-1] + browlengths[i-1];
1803     a->ilen[i-1] = browlengths[i-1];
1804   }
1805   a->nz         = 0;
1806   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1807 
1808   /* read in nonzero values */
1809   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1810   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1811   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1812 
1813   /* set "a" and "j" values into matrix */
1814   nzcount = 0; jcount = 0;
1815   for (i=0; i<mbs; i++) {
1816     nzcountb = nzcount;
1817     nmask    = 0;
1818     for (j=0; j<bs; j++) {
1819       kmax = rowlengths[i*bs+j];
1820       for (k=0; k<kmax; k++) {
1821         tmp = jj[nzcount++]/bs;
1822 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1823       }
1824     }
1825     /* sort the masked values */
1826     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1827 
1828     /* set "j" values into matrix */
1829     maskcount = 1;
1830     for (j=0; j<nmask; j++) {
1831       a->j[jcount++]  = masked[j];
1832       mask[masked[j]] = maskcount++;
1833     }
1834     /* set "a" values into matrix */
1835     ishift = bs2*a->i[i];
1836     for (j=0; j<bs; j++) {
1837       kmax = rowlengths[i*bs+j];
1838       for (k=0; k<kmax; k++) {
1839         tmp       = jj[nzcountb]/bs ;
1840         block     = mask[tmp] - 1;
1841         point     = jj[nzcountb] - bs*tmp;
1842         idx       = ishift + bs2*block + j + bs*point;
1843         a->a[idx] = (MatScalar)aa[nzcountb++];
1844       }
1845     }
1846     /* zero out the mask elements we set */
1847     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1848   }
1849   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1850 
1851   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1852   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1853   ierr = PetscFree(aa);CHKERRQ(ierr);
1854   ierr = PetscFree(jj);CHKERRQ(ierr);
1855   ierr = PetscFree(mask);CHKERRQ(ierr);
1856 
1857   B->assembled = PETSC_TRUE;
1858 
1859   ierr = MatView_Private(B);CHKERRQ(ierr);
1860   PetscFunctionReturn(0);
1861 }
1862 EXTERN_C_END
1863 
1864 #undef __FUNCT__
1865 #define __FUNCT__ "MatCreateSeqBAIJ"
1866 /*@C
1867    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1868    compressed row) format.  For good matrix assembly performance the
1869    user should preallocate the matrix storage by setting the parameter nz
1870    (or the array nnz).  By setting these parameters accurately, performance
1871    during matrix assembly can be increased by more than a factor of 50.
1872 
1873    Collective on MPI_Comm
1874 
1875    Input Parameters:
1876 +  comm - MPI communicator, set to PETSC_COMM_SELF
1877 .  bs - size of block
1878 .  m - number of rows
1879 .  n - number of columns
1880 .  nz - number of nonzero blocks  per block row (same for all rows)
1881 -  nnz - array containing the number of nonzero blocks in the various block rows
1882          (possibly different for each block row) or PETSC_NULL
1883 
1884    Output Parameter:
1885 .  A - the matrix
1886 
1887    Options Database Keys:
1888 .   -mat_no_unroll - uses code that does not unroll the loops in the
1889                      block calculations (much slower)
1890 .    -mat_block_size - size of the blocks to use
1891 
1892    Level: intermediate
1893 
1894    Notes:
1895    A nonzero block is any block that as 1 or more nonzeros in it
1896 
1897    The block AIJ format is fully compatible with standard Fortran 77
1898    storage.  That is, the stored row and column indices can begin at
1899    either one (as in Fortran) or zero.  See the users' manual for details.
1900 
1901    Specify the preallocated storage with either nz or nnz (not both).
1902    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1903    allocation.  For additional details, see the users manual chapter on
1904    matrices.
1905 
1906 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1907 @*/
1908 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1909 {
1910   int ierr;
1911 
1912   PetscFunctionBegin;
1913   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1914   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
1915   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 #undef __FUNCT__
1920 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
1921 /*@C
1922    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1923    per row in the matrix. For good matrix assembly performance the
1924    user should preallocate the matrix storage by setting the parameter nz
1925    (or the array nnz).  By setting these parameters accurately, performance
1926    during matrix assembly can be increased by more than a factor of 50.
1927 
1928    Collective on MPI_Comm
1929 
1930    Input Parameters:
1931 +  A - the matrix
1932 .  bs - size of block
1933 .  nz - number of block nonzeros per block row (same for all rows)
1934 -  nnz - array containing the number of block nonzeros in the various block rows
1935          (possibly different for each block row) or PETSC_NULL
1936 
1937    Options Database Keys:
1938 .   -mat_no_unroll - uses code that does not unroll the loops in the
1939                      block calculations (much slower)
1940 .    -mat_block_size - size of the blocks to use
1941 
1942    Level: intermediate
1943 
1944    Notes:
1945    The block AIJ format is fully compatible with standard Fortran 77
1946    storage.  That is, the stored row and column indices can begin at
1947    either one (as in Fortran) or zero.  See the users' manual for details.
1948 
1949    Specify the preallocated storage with either nz or nnz (not both).
1950    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1951    allocation.  For additional details, see the users manual chapter on
1952    matrices.
1953 
1954 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1955 @*/
1956 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1957 {
1958   Mat_SeqBAIJ *b;
1959   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1960   PetscTruth  flg;
1961 
1962   PetscFunctionBegin;
1963   ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr);
1964   if (!flg) PetscFunctionReturn(0);
1965 
1966   B->preallocated = PETSC_TRUE;
1967   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1968   if (nnz && newbs != bs) {
1969     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1970   }
1971   bs   = newbs;
1972 
1973   mbs  = B->m/bs;
1974   nbs  = B->n/bs;
1975   bs2  = bs*bs;
1976 
1977   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1978     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1979   }
1980 
1981   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1982   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1983   if (nnz) {
1984     for (i=0; i<mbs; i++) {
1985       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1986       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1987     }
1988   }
1989 
1990   b       = (Mat_SeqBAIJ*)B->data;
1991   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1992   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1993   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1994   if (!flg) {
1995     switch (bs) {
1996     case 1:
1997       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1998       B->ops->mult            = MatMult_SeqBAIJ_1;
1999       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2000       break;
2001     case 2:
2002       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2003       B->ops->mult            = MatMult_SeqBAIJ_2;
2004       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2005       break;
2006     case 3:
2007       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2008       B->ops->mult            = MatMult_SeqBAIJ_3;
2009       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2010       break;
2011     case 4:
2012       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2013       B->ops->mult            = MatMult_SeqBAIJ_4;
2014       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2015       break;
2016     case 5:
2017       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2018       B->ops->mult            = MatMult_SeqBAIJ_5;
2019       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2020       break;
2021     case 6:
2022       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2023       B->ops->mult            = MatMult_SeqBAIJ_6;
2024       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2025       break;
2026     case 7:
2027       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2028       B->ops->mult            = MatMult_SeqBAIJ_7;
2029       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2030       break;
2031     default:
2032       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2033       B->ops->mult            = MatMult_SeqBAIJ_N;
2034       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2035       break;
2036     }
2037   }
2038   b->bs      = bs;
2039   b->mbs     = mbs;
2040   b->nbs     = nbs;
2041   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
2042   if (!nnz) {
2043     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2044     else if (nz <= 0)        nz = 1;
2045     for (i=0; i<mbs; i++) b->imax[i] = nz;
2046     nz = nz*mbs;
2047   } else {
2048     nz = 0;
2049     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2050   }
2051 
2052   /* allocate the matrix space */
2053   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
2054   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
2055   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2056   b->j  = (int*)(b->a + nz*bs2);
2057   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
2058   b->i  = b->j + nz;
2059   b->singlemalloc = PETSC_TRUE;
2060 
2061   b->i[0] = 0;
2062   for (i=1; i<mbs+1; i++) {
2063     b->i[i] = b->i[i-1] + b->imax[i-1];
2064   }
2065 
2066   /* b->ilen will count nonzeros in each block row so far. */
2067   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
2068   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2069   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2070 
2071   b->bs               = bs;
2072   b->bs2              = bs2;
2073   b->mbs              = mbs;
2074   b->nz               = 0;
2075   b->maxnz            = nz*bs2;
2076   B->info.nz_unneeded = (PetscReal)b->maxnz;
2077   PetscFunctionReturn(0);
2078 }
2079 
2080