xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 63b9fa5e258ef37d5bcff491ebacde33f30ed49b)
1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "petscsys.h"                     /*I "petscmat.h" I*/
11 
12 /*
13     Special version for Fun3d sequential benchmark
14 */
15 #if defined(PETSC_HAVE_FORTRAN_CAPS)
16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
18 #define matsetvaluesblocked4_ matsetvaluesblocked4
19 #endif
20 
21 EXTERN_C_BEGIN
22 #undef __FUNCT__
23 #define __FUNCT__ "matsetvaluesblocked4_"
24 void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
25 {
26   Mat         A = *AA;
27   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
28   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
29   int         *ai=a->i,*ailen=a->ilen;
30   int         *aj=a->j,stepval;
31   PetscScalar *value = v;
32   MatScalar   *ap,*aa = a->a,*bap;
33 
34   PetscFunctionBegin;
35   stepval = (n-1)*4;
36   for (k=0; k<m; k++) { /* loop over added rows */
37     row  = im[k];
38     rp   = aj + ai[row];
39     ap   = aa + 16*ai[row];
40     nrow = ailen[row];
41     low  = 0;
42     for (l=0; l<n; l++) { /* loop over added columns */
43       col = in[l];
44       value = v + k*(stepval+4)*4 + l*4;
45       low = 0; high = nrow;
46       while (high-low > 7) {
47         t = (low+high)/2;
48         if (rp[t] > col) high = t;
49         else             low  = t;
50       }
51       for (i=low; i<high; i++) {
52         if (rp[i] > col) break;
53         if (rp[i] == col) {
54           bap  = ap +  16*i;
55           for (ii=0; ii<4; ii++,value+=stepval) {
56             for (jj=ii; jj<16; jj+=4) {
57               bap[jj] += *value++;
58             }
59           }
60           goto noinsert2;
61         }
62       }
63       N = nrow++ - 1;
64       /* shift up all the later entries in this row */
65       for (ii=N; ii>=i; ii--) {
66         rp[ii+1] = rp[ii];
67         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
68       }
69       if (N >= i) {
70         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
71       }
72       rp[i] = col;
73       bap   = ap +  16*i;
74       for (ii=0; ii<4; ii++,value+=stepval) {
75         for (jj=ii; jj<16; jj+=4) {
76           bap[jj] = *value++;
77         }
78       }
79       noinsert2:;
80       low = i;
81     }
82     ailen[row] = nrow;
83   }
84 }
85 EXTERN_C_END
86 
87 #if defined(PETSC_HAVE_FORTRAN_CAPS)
88 #define matsetvalues4_ MATSETVALUES4
89 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
90 #define matsetvalues4_ matsetvalues4
91 #endif
92 
93 EXTERN_C_BEGIN
94 #undef __FUNCT__
95 #define __FUNCT__ "MatSetValues4_"
96 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
97 {
98   Mat         A = *AA;
99   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
100   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
101   int         *ai=a->i,*ailen=a->ilen;
102   int         *aj=a->j,brow,bcol;
103   int         ridx,cidx;
104   MatScalar   *ap,value,*aa=a->a,*bap;
105 
106   PetscFunctionBegin;
107   for (k=0; k<m; k++) { /* loop over added rows */
108     row  = im[k]; brow = row/4;
109     rp   = aj + ai[brow];
110     ap   = aa + 16*ai[brow];
111     nrow = ailen[brow];
112     low  = 0;
113     for (l=0; l<n; l++) { /* loop over added columns */
114       col = in[l]; bcol = col/4;
115       ridx = row % 4; cidx = col % 4;
116       value = v[l + k*n];
117       low = 0; high = nrow;
118       while (high-low > 7) {
119         t = (low+high)/2;
120         if (rp[t] > bcol) high = t;
121         else              low  = t;
122       }
123       for (i=low; i<high; i++) {
124         if (rp[i] > bcol) break;
125         if (rp[i] == bcol) {
126           bap  = ap +  16*i + 4*cidx + ridx;
127           *bap += value;
128           goto noinsert1;
129         }
130       }
131       N = nrow++ - 1;
132       /* shift up all the later entries in this row */
133       for (ii=N; ii>=i; ii--) {
134         rp[ii+1] = rp[ii];
135         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
136       }
137       if (N>=i) {
138         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
139       }
140       rp[i]                    = bcol;
141       ap[16*i + 4*cidx + ridx] = value;
142       noinsert1:;
143       low = i;
144     }
145     ailen[brow] = nrow;
146   }
147 }
148 EXTERN_C_END
149 
150 /*  UGLY, ugly, ugly
151    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
152    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
153    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
154    converts the entries into single precision and then calls ..._MatScalar() to put them
155    into the single precision data structures.
156 */
157 #if defined(PETSC_USE_MAT_SINGLE)
158 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
159 #else
160 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
161 #endif
162 #if defined(PETSC_HAVE_DSCPACK)
163 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat);
164 #endif
165 
166 #define CHUNKSIZE  10
167 
168 /*
169      Checks for missing diagonals
170 */
171 #undef __FUNCT__
172 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
173 int MatMissingDiagonal_SeqBAIJ(Mat A)
174 {
175   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
176   int         *diag,*jj = a->j,i,ierr;
177 
178   PetscFunctionBegin;
179   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
180   diag = a->diag;
181   for (i=0; i<a->mbs; i++) {
182     if (jj[diag[i]] != i) {
183       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
184     }
185   }
186   PetscFunctionReturn(0);
187 }
188 
189 #undef __FUNCT__
190 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
191 int MatMarkDiagonal_SeqBAIJ(Mat A)
192 {
193   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
194   int         i,j,*diag,m = a->mbs,ierr;
195 
196   PetscFunctionBegin;
197   if (a->diag) PetscFunctionReturn(0);
198 
199   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
200   PetscLogObjectMemory(A,(m+1)*sizeof(int));
201   for (i=0; i<m; i++) {
202     diag[i] = a->i[i+1];
203     for (j=a->i[i]; j<a->i[i+1]; j++) {
204       if (a->j[j] == i) {
205         diag[i] = j;
206         break;
207       }
208     }
209   }
210   a->diag = diag;
211   PetscFunctionReturn(0);
212 }
213 
214 
215 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
216 
217 #undef __FUNCT__
218 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
219 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
220 {
221   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
222   int         ierr,n = a->mbs,i;
223 
224   PetscFunctionBegin;
225   *nn = n;
226   if (!ia) PetscFunctionReturn(0);
227   if (symmetric) {
228     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
229   } else if (oshift == 1) {
230     /* temporarily add 1 to i and j indices */
231     int nz = a->i[n];
232     for (i=0; i<nz; i++) a->j[i]++;
233     for (i=0; i<n+1; i++) a->i[i]++;
234     *ia = a->i; *ja = a->j;
235   } else {
236     *ia = a->i; *ja = a->j;
237   }
238 
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNCT__
243 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
244 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
245 {
246   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
247   int         i,n = a->mbs,ierr;
248 
249   PetscFunctionBegin;
250   if (!ia) PetscFunctionReturn(0);
251   if (symmetric) {
252     ierr = PetscFree(*ia);CHKERRQ(ierr);
253     ierr = PetscFree(*ja);CHKERRQ(ierr);
254   } else if (oshift == 1) {
255     int nz = a->i[n]-1;
256     for (i=0; i<nz; i++) a->j[i]--;
257     for (i=0; i<n+1; i++) a->i[i]--;
258   }
259   PetscFunctionReturn(0);
260 }
261 
262 #undef __FUNCT__
263 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
264 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
265 {
266   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
267 
268   PetscFunctionBegin;
269   *bs = baij->bs;
270   PetscFunctionReturn(0);
271 }
272 
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatDestroy_SeqBAIJ"
276 int MatDestroy_SeqBAIJ(Mat A)
277 {
278   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
279   int         ierr;
280 
281   PetscFunctionBegin;
282 #if defined(PETSC_USE_LOG)
283   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
284 #endif
285   ierr = PetscFree(a->a);CHKERRQ(ierr);
286   if (!a->singlemalloc) {
287     ierr = PetscFree(a->i);CHKERRQ(ierr);
288     ierr = PetscFree(a->j);CHKERRQ(ierr);
289   }
290   if (a->row) {
291     ierr = ISDestroy(a->row);CHKERRQ(ierr);
292   }
293   if (a->col) {
294     ierr = ISDestroy(a->col);CHKERRQ(ierr);
295   }
296   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
297   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
298   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
299   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
300   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
301   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
302   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
303 #if defined(PETSC_USE_MAT_SINGLE)
304   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
305 #endif
306   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
307 
308   ierr = PetscFree(a);CHKERRQ(ierr);
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "MatSetOption_SeqBAIJ"
314 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
315 {
316   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
317 
318   PetscFunctionBegin;
319   switch (op) {
320   case MAT_ROW_ORIENTED:
321     a->roworiented    = PETSC_TRUE;
322     break;
323   case MAT_COLUMN_ORIENTED:
324     a->roworiented    = PETSC_FALSE;
325     break;
326   case MAT_COLUMNS_SORTED:
327     a->sorted         = PETSC_TRUE;
328     break;
329   case MAT_COLUMNS_UNSORTED:
330     a->sorted         = PETSC_FALSE;
331     break;
332   case MAT_KEEP_ZEROED_ROWS:
333     a->keepzeroedrows = PETSC_TRUE;
334     break;
335   case MAT_NO_NEW_NONZERO_LOCATIONS:
336     a->nonew          = 1;
337     break;
338   case MAT_NEW_NONZERO_LOCATION_ERR:
339     a->nonew          = -1;
340     break;
341   case MAT_NEW_NONZERO_ALLOCATION_ERR:
342     a->nonew          = -2;
343     break;
344   case MAT_YES_NEW_NONZERO_LOCATIONS:
345     a->nonew          = 0;
346     break;
347   case MAT_ROWS_SORTED:
348   case MAT_ROWS_UNSORTED:
349   case MAT_YES_NEW_DIAGONALS:
350   case MAT_IGNORE_OFF_PROC_ENTRIES:
351   case MAT_USE_HASH_TABLE:
352     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
353     break;
354   case MAT_NO_NEW_DIAGONALS:
355     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
356   default:
357     SETERRQ(PETSC_ERR_SUP,"unknown option");
358   }
359   PetscFunctionReturn(0);
360 }
361 
362 #undef __FUNCT__
363 #define __FUNCT__ "MatGetRow_SeqBAIJ"
364 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
365 {
366   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
367   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
368   MatScalar    *aa,*aa_i;
369   PetscScalar  *v_i;
370 
371   PetscFunctionBegin;
372   bs  = a->bs;
373   ai  = a->i;
374   aj  = a->j;
375   aa  = a->a;
376   bs2 = a->bs2;
377 
378   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
379 
380   bn  = row/bs;   /* Block number */
381   bp  = row % bs; /* Block Position */
382   M   = ai[bn+1] - ai[bn];
383   *nz = bs*M;
384 
385   if (v) {
386     *v = 0;
387     if (*nz) {
388       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
389       for (i=0; i<M; i++) { /* for each block in the block row */
390         v_i  = *v + i*bs;
391         aa_i = aa + bs2*(ai[bn] + i);
392         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
393       }
394     }
395   }
396 
397   if (idx) {
398     *idx = 0;
399     if (*nz) {
400       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
401       for (i=0; i<M; i++) { /* for each block in the block row */
402         idx_i = *idx + i*bs;
403         itmp  = bs*aj[ai[bn] + i];
404         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
405       }
406     }
407   }
408   PetscFunctionReturn(0);
409 }
410 
411 #undef __FUNCT__
412 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
413 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
414 {
415   int ierr;
416 
417   PetscFunctionBegin;
418   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
419   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
420   PetscFunctionReturn(0);
421 }
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "MatTranspose_SeqBAIJ"
425 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
426 {
427   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
428   Mat         C;
429   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
430   int         *rows,*cols,bs2=a->bs2;
431   PetscScalar *array;
432 
433   PetscFunctionBegin;
434   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
435   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
436   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
437 
438 #if defined(PETSC_USE_MAT_SINGLE)
439   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
440   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
441 #else
442   array = a->a;
443 #endif
444 
445   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
446   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
447   ierr = PetscFree(col);CHKERRQ(ierr);
448   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
449   cols = rows + bs;
450   for (i=0; i<mbs; i++) {
451     cols[0] = i*bs;
452     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
453     len = ai[i+1] - ai[i];
454     for (j=0; j<len; j++) {
455       rows[0] = (*aj++)*bs;
456       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
457       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
458       array += bs2;
459     }
460   }
461   ierr = PetscFree(rows);CHKERRQ(ierr);
462 #if defined(PETSC_USE_MAT_SINGLE)
463   ierr = PetscFree(array);
464 #endif
465 
466   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
467   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
468 
469   if (B) {
470     *B = C;
471   } else {
472     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
473   }
474   PetscFunctionReturn(0);
475 }
476 
477 #undef __FUNCT__
478 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
479 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
480 {
481   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
482   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
483   PetscScalar *aa;
484   FILE        *file;
485 
486   PetscFunctionBegin;
487   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
488   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
489   col_lens[0] = MAT_FILE_COOKIE;
490 
491   col_lens[1] = A->m;
492   col_lens[2] = A->n;
493   col_lens[3] = a->nz*bs2;
494 
495   /* store lengths of each row and write (including header) to file */
496   count = 0;
497   for (i=0; i<a->mbs; i++) {
498     for (j=0; j<bs; j++) {
499       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
500     }
501   }
502   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
503   ierr = PetscFree(col_lens);CHKERRQ(ierr);
504 
505   /* store column indices (zero start index) */
506   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
507   count = 0;
508   for (i=0; i<a->mbs; i++) {
509     for (j=0; j<bs; j++) {
510       for (k=a->i[i]; k<a->i[i+1]; k++) {
511         for (l=0; l<bs; l++) {
512           jj[count++] = bs*a->j[k] + l;
513         }
514       }
515     }
516   }
517   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
518   ierr = PetscFree(jj);CHKERRQ(ierr);
519 
520   /* store nonzero values */
521   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
522   count = 0;
523   for (i=0; i<a->mbs; i++) {
524     for (j=0; j<bs; j++) {
525       for (k=a->i[i]; k<a->i[i+1]; k++) {
526         for (l=0; l<bs; l++) {
527           aa[count++] = a->a[bs2*k + l*bs + j];
528         }
529       }
530     }
531   }
532   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
533   ierr = PetscFree(aa);CHKERRQ(ierr);
534 
535   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
536   if (file) {
537     fprintf(file,"-matload_block_size %d\n",a->bs);
538   }
539   PetscFunctionReturn(0);
540 }
541 
542 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer);
543 
544 #undef __FUNCT__
545 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
546 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
547 {
548   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
549   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
550   PetscViewerFormat format;
551 
552   PetscFunctionBegin;
553   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
554   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
555     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
556   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
557     Mat aij;
558     ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr);
559     ierr = MatView(aij,viewer);CHKERRQ(ierr);
560     ierr = MatDestroy(aij);CHKERRQ(ierr);
561   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
562 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
563      ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr);
564 #endif
565      PetscFunctionReturn(0);
566   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
567     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
568     for (i=0; i<a->mbs; i++) {
569       for (j=0; j<bs; j++) {
570         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
571         for (k=a->i[i]; k<a->i[i+1]; k++) {
572           for (l=0; l<bs; l++) {
573 #if defined(PETSC_USE_COMPLEX)
574             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
575               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
576                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
577             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
578               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
579                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
580             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
581               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
582             }
583 #else
584             if (a->a[bs2*k + l*bs + j] != 0.0) {
585               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
586             }
587 #endif
588           }
589         }
590         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
591       }
592     }
593     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
594   } else {
595     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
596     for (i=0; i<a->mbs; i++) {
597       for (j=0; j<bs; j++) {
598         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
599         for (k=a->i[i]; k<a->i[i+1]; k++) {
600           for (l=0; l<bs; l++) {
601 #if defined(PETSC_USE_COMPLEX)
602             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
603               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
604                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
605             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
606               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
607                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
608             } else {
609               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
610             }
611 #else
612             ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
613 #endif
614           }
615         }
616         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
617       }
618     }
619     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
620   }
621   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
622   PetscFunctionReturn(0);
623 }
624 
625 #undef __FUNCT__
626 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
627 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
628 {
629   Mat          A = (Mat) Aa;
630   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
631   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
632   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
633   MatScalar    *aa;
634   PetscViewer  viewer;
635 
636   PetscFunctionBegin;
637 
638   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
639   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
640 
641   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
642 
643   /* loop over matrix elements drawing boxes */
644   color = PETSC_DRAW_BLUE;
645   for (i=0,row=0; i<mbs; i++,row+=bs) {
646     for (j=a->i[i]; j<a->i[i+1]; j++) {
647       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
648       x_l = a->j[j]*bs; x_r = x_l + 1.0;
649       aa = a->a + j*bs2;
650       for (k=0; k<bs; k++) {
651         for (l=0; l<bs; l++) {
652           if (PetscRealPart(*aa++) >=  0.) continue;
653           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
654         }
655       }
656     }
657   }
658   color = PETSC_DRAW_CYAN;
659   for (i=0,row=0; i<mbs; i++,row+=bs) {
660     for (j=a->i[i]; j<a->i[i+1]; j++) {
661       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
662       x_l = a->j[j]*bs; x_r = x_l + 1.0;
663       aa = a->a + j*bs2;
664       for (k=0; k<bs; k++) {
665         for (l=0; l<bs; l++) {
666           if (PetscRealPart(*aa++) != 0.) continue;
667           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
668         }
669       }
670     }
671   }
672 
673   color = PETSC_DRAW_RED;
674   for (i=0,row=0; i<mbs; i++,row+=bs) {
675     for (j=a->i[i]; j<a->i[i+1]; j++) {
676       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
677       x_l = a->j[j]*bs; x_r = x_l + 1.0;
678       aa = a->a + j*bs2;
679       for (k=0; k<bs; k++) {
680         for (l=0; l<bs; l++) {
681           if (PetscRealPart(*aa++) <= 0.) continue;
682           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
683         }
684       }
685     }
686   }
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
692 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
693 {
694   int          ierr;
695   PetscReal    xl,yl,xr,yr,w,h;
696   PetscDraw    draw;
697   PetscTruth   isnull;
698 
699   PetscFunctionBegin;
700 
701   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
702   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
703 
704   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
705   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
706   xr += w;    yr += h;  xl = -w;     yl = -h;
707   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
708   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
709   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
710   PetscFunctionReturn(0);
711 }
712 
713 #undef __FUNCT__
714 #define __FUNCT__ "MatView_SeqBAIJ"
715 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
716 {
717   int        ierr;
718   PetscTruth issocket,isascii,isbinary,isdraw;
719 
720   PetscFunctionBegin;
721   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
722   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
723   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
724   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
725   if (issocket) {
726     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
727   } else if (isascii){
728     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
729   } else if (isbinary) {
730     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
731   } else if (isdraw) {
732     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
733   } else {
734     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
735   }
736   PetscFunctionReturn(0);
737 }
738 
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatGetValues_SeqBAIJ"
742 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
743 {
744   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
745   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
746   int        *ai = a->i,*ailen = a->ilen;
747   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
748   MatScalar  *ap,*aa = a->a,zero = 0.0;
749 
750   PetscFunctionBegin;
751   for (k=0; k<m; k++) { /* loop over rows */
752     row  = im[k]; brow = row/bs;
753     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
754     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
755     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
756     nrow = ailen[brow];
757     for (l=0; l<n; l++) { /* loop over columns */
758       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
759       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
760       col  = in[l] ;
761       bcol = col/bs;
762       cidx = col%bs;
763       ridx = row%bs;
764       high = nrow;
765       low  = 0; /* assume unsorted */
766       while (high-low > 5) {
767         t = (low+high)/2;
768         if (rp[t] > bcol) high = t;
769         else             low  = t;
770       }
771       for (i=low; i<high; i++) {
772         if (rp[i] > bcol) break;
773         if (rp[i] == bcol) {
774           *v++ = ap[bs2*i+bs*cidx+ridx];
775           goto finished;
776         }
777       }
778       *v++ = zero;
779       finished:;
780     }
781   }
782   PetscFunctionReturn(0);
783 }
784 
785 #if defined(PETSC_USE_MAT_SINGLE)
786 #undef __FUNCT__
787 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
788 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
789 {
790   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
791   int         ierr,i,N = m*n*b->bs2;
792   MatScalar   *vsingle;
793 
794   PetscFunctionBegin;
795   if (N > b->setvalueslen) {
796     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
797     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
798     b->setvalueslen  = N;
799   }
800   vsingle = b->setvaluescopy;
801   for (i=0; i<N; i++) {
802     vsingle[i] = v[i];
803   }
804   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
805   PetscFunctionReturn(0);
806 }
807 #endif
808 
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
812 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
813 {
814   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
815   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
816   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
817   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
818   PetscTruth  roworiented=a->roworiented;
819   MatScalar   *value = v,*ap,*aa = a->a,*bap;
820 
821   PetscFunctionBegin;
822   if (roworiented) {
823     stepval = (n-1)*bs;
824   } else {
825     stepval = (m-1)*bs;
826   }
827   for (k=0; k<m; k++) { /* loop over added rows */
828     row  = im[k];
829     if (row < 0) continue;
830 #if defined(PETSC_USE_BOPT_g)
831     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
832 #endif
833     rp   = aj + ai[row];
834     ap   = aa + bs2*ai[row];
835     rmax = imax[row];
836     nrow = ailen[row];
837     low  = 0;
838     for (l=0; l<n; l++) { /* loop over added columns */
839       if (in[l] < 0) continue;
840 #if defined(PETSC_USE_BOPT_g)
841       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
842 #endif
843       col = in[l];
844       if (roworiented) {
845         value = v + k*(stepval+bs)*bs + l*bs;
846       } else {
847         value = v + l*(stepval+bs)*bs + k*bs;
848       }
849       if (!sorted) low = 0; high = nrow;
850       while (high-low > 7) {
851         t = (low+high)/2;
852         if (rp[t] > col) high = t;
853         else             low  = t;
854       }
855       for (i=low; i<high; i++) {
856         if (rp[i] > col) break;
857         if (rp[i] == col) {
858           bap  = ap +  bs2*i;
859           if (roworiented) {
860             if (is == ADD_VALUES) {
861               for (ii=0; ii<bs; ii++,value+=stepval) {
862                 for (jj=ii; jj<bs2; jj+=bs) {
863                   bap[jj] += *value++;
864                 }
865               }
866             } else {
867               for (ii=0; ii<bs; ii++,value+=stepval) {
868                 for (jj=ii; jj<bs2; jj+=bs) {
869                   bap[jj] = *value++;
870                 }
871               }
872             }
873           } else {
874             if (is == ADD_VALUES) {
875               for (ii=0; ii<bs; ii++,value+=stepval) {
876                 for (jj=0; jj<bs; jj++) {
877                   *bap++ += *value++;
878                 }
879               }
880             } else {
881               for (ii=0; ii<bs; ii++,value+=stepval) {
882                 for (jj=0; jj<bs; jj++) {
883                   *bap++  = *value++;
884                 }
885               }
886             }
887           }
888           goto noinsert2;
889         }
890       }
891       if (nonew == 1) goto noinsert2;
892       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
893       if (nrow >= rmax) {
894         /* there is no extra room in row, therefore enlarge */
895         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
896         MatScalar *new_a;
897 
898         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
899 
900         /* malloc new storage space */
901         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
902 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
903         new_j   = (int*)(new_a + bs2*new_nz);
904         new_i   = new_j + new_nz;
905 
906         /* copy over old data into new slots */
907         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
908         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
909         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
910         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
911         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
912         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
913         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
914         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
915         /* free up old matrix storage */
916         ierr = PetscFree(a->a);CHKERRQ(ierr);
917         if (!a->singlemalloc) {
918           ierr = PetscFree(a->i);CHKERRQ(ierr);
919           ierr = PetscFree(a->j);CHKERRQ(ierr);
920         }
921         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
922         a->singlemalloc = PETSC_TRUE;
923 
924         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
925         rmax = imax[row] = imax[row] + CHUNKSIZE;
926         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
927         a->maxnz += bs2*CHUNKSIZE;
928         a->reallocs++;
929         a->nz++;
930       }
931       N = nrow++ - 1;
932       /* shift up all the later entries in this row */
933       for (ii=N; ii>=i; ii--) {
934         rp[ii+1] = rp[ii];
935         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
936       }
937       if (N >= i) {
938         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
939       }
940       rp[i] = col;
941       bap   = ap +  bs2*i;
942       if (roworiented) {
943         for (ii=0; ii<bs; ii++,value+=stepval) {
944           for (jj=ii; jj<bs2; jj+=bs) {
945             bap[jj] = *value++;
946           }
947         }
948       } else {
949         for (ii=0; ii<bs; ii++,value+=stepval) {
950           for (jj=0; jj<bs; jj++) {
951             *bap++  = *value++;
952           }
953         }
954       }
955       noinsert2:;
956       low = i;
957     }
958     ailen[row] = nrow;
959   }
960   PetscFunctionReturn(0);
961 }
962 
963 #undef __FUNCT__
964 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
965 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
966 {
967   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
968   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
969   int        m = A->m,*ip,N,*ailen = a->ilen;
970   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
971   MatScalar  *aa = a->a,*ap;
972 #if defined(PETSC_HAVE_DSCPACK)
973   PetscTruth   flag;
974 #endif
975 
976   PetscFunctionBegin;
977   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
978 
979   if (m) rmax = ailen[0];
980   for (i=1; i<mbs; i++) {
981     /* move each row back by the amount of empty slots (fshift) before it*/
982     fshift += imax[i-1] - ailen[i-1];
983     rmax   = PetscMax(rmax,ailen[i]);
984     if (fshift) {
985       ip = aj + ai[i]; ap = aa + bs2*ai[i];
986       N = ailen[i];
987       for (j=0; j<N; j++) {
988         ip[j-fshift] = ip[j];
989         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
990       }
991     }
992     ai[i] = ai[i-1] + ailen[i-1];
993   }
994   if (mbs) {
995     fshift += imax[mbs-1] - ailen[mbs-1];
996     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
997   }
998   /* reset ilen and imax for each row */
999   for (i=0; i<mbs; i++) {
1000     ailen[i] = imax[i] = ai[i+1] - ai[i];
1001   }
1002   a->nz = ai[mbs];
1003 
1004   /* diagonals may have moved, so kill the diagonal pointers */
1005   if (fshift && a->diag) {
1006     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1007     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
1008     a->diag = 0;
1009   }
1010   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1011   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
1012   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
1013   a->reallocs          = 0;
1014   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1015 
1016 #if defined(PETSC_HAVE_DSCPACK)
1017   ierr = PetscOptionsHasName(A->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr);
1018   if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); }
1019 #endif
1020 
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 
1025 
1026 /*
1027    This function returns an array of flags which indicate the locations of contiguous
1028    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1029    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1030    Assume: sizes should be long enough to hold all the values.
1031 */
1032 #undef __FUNCT__
1033 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1034 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1035 {
1036   int        i,j,k,row;
1037   PetscTruth flg;
1038 
1039   PetscFunctionBegin;
1040   for (i=0,j=0; i<n; j++) {
1041     row = idx[i];
1042     if (row%bs!=0) { /* Not the begining of a block */
1043       sizes[j] = 1;
1044       i++;
1045     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1046       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1047       i++;
1048     } else { /* Begining of the block, so check if the complete block exists */
1049       flg = PETSC_TRUE;
1050       for (k=1; k<bs; k++) {
1051         if (row+k != idx[i+k]) { /* break in the block */
1052           flg = PETSC_FALSE;
1053           break;
1054         }
1055       }
1056       if (flg == PETSC_TRUE) { /* No break in the bs */
1057         sizes[j] = bs;
1058         i+= bs;
1059       } else {
1060         sizes[j] = 1;
1061         i++;
1062       }
1063     }
1064   }
1065   *bs_max = j;
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1071 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag)
1072 {
1073   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1074   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
1075   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1076   PetscScalar zero = 0.0;
1077   MatScalar   *aa;
1078 
1079   PetscFunctionBegin;
1080   /* Make a copy of the IS and  sort it */
1081   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1082   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1083 
1084   /* allocate memory for rows,sizes */
1085   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
1086   sizes = rows + is_n;
1087 
1088   /* copy IS values to rows, and sort them */
1089   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1090   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1091   if (baij->keepzeroedrows) {
1092     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1093     bs_max = is_n;
1094   } else {
1095     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1096   }
1097   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1098 
1099   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1100     row   = rows[j];
1101     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1102     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1103     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1104     if (sizes[i] == bs && !baij->keepzeroedrows) {
1105       if (diag) {
1106         if (baij->ilen[row/bs] > 0) {
1107           baij->ilen[row/bs]       = 1;
1108           baij->j[baij->i[row/bs]] = row/bs;
1109           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1110         }
1111         /* Now insert all the diagonal values for this bs */
1112         for (k=0; k<bs; k++) {
1113           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1114         }
1115       } else { /* (!diag) */
1116         baij->ilen[row/bs] = 0;
1117       } /* end (!diag) */
1118     } else { /* (sizes[i] != bs) */
1119 #if defined (PETSC_USE_DEBUG)
1120       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1121 #endif
1122       for (k=0; k<count; k++) {
1123         aa[0] =  zero;
1124         aa    += bs;
1125       }
1126       if (diag) {
1127         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1128       }
1129     }
1130   }
1131 
1132   ierr = PetscFree(rows);CHKERRQ(ierr);
1133   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1139 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
1140 {
1141   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1142   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1143   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1144   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1145   int         ridx,cidx,bs2=a->bs2,ierr;
1146   PetscTruth  roworiented=a->roworiented;
1147   MatScalar   *ap,value,*aa=a->a,*bap;
1148 
1149   PetscFunctionBegin;
1150   for (k=0; k<m; k++) { /* loop over added rows */
1151     row  = im[k]; brow = row/bs;
1152     if (row < 0) continue;
1153 #if defined(PETSC_USE_BOPT_g)
1154     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1155 #endif
1156     rp   = aj + ai[brow];
1157     ap   = aa + bs2*ai[brow];
1158     rmax = imax[brow];
1159     nrow = ailen[brow];
1160     low  = 0;
1161     for (l=0; l<n; l++) { /* loop over added columns */
1162       if (in[l] < 0) continue;
1163 #if defined(PETSC_USE_BOPT_g)
1164       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1165 #endif
1166       col = in[l]; bcol = col/bs;
1167       ridx = row % bs; cidx = col % bs;
1168       if (roworiented) {
1169         value = v[l + k*n];
1170       } else {
1171         value = v[k + l*m];
1172       }
1173       if (!sorted) low = 0; high = nrow;
1174       while (high-low > 7) {
1175         t = (low+high)/2;
1176         if (rp[t] > bcol) high = t;
1177         else              low  = t;
1178       }
1179       for (i=low; i<high; i++) {
1180         if (rp[i] > bcol) break;
1181         if (rp[i] == bcol) {
1182           bap  = ap +  bs2*i + bs*cidx + ridx;
1183           if (is == ADD_VALUES) *bap += value;
1184           else                  *bap  = value;
1185           goto noinsert1;
1186         }
1187       }
1188       if (nonew == 1) goto noinsert1;
1189       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1190       if (nrow >= rmax) {
1191         /* there is no extra room in row, therefore enlarge */
1192         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1193         MatScalar *new_a;
1194 
1195         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1196 
1197         /* Malloc new storage space */
1198         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1199 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1200         new_j   = (int*)(new_a + bs2*new_nz);
1201         new_i   = new_j + new_nz;
1202 
1203         /* copy over old data into new slots */
1204         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1205         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1206         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1207         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1208         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1209         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1210         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1211         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1212         /* free up old matrix storage */
1213         ierr = PetscFree(a->a);CHKERRQ(ierr);
1214         if (!a->singlemalloc) {
1215           ierr = PetscFree(a->i);CHKERRQ(ierr);
1216           ierr = PetscFree(a->j);CHKERRQ(ierr);
1217         }
1218         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1219         a->singlemalloc = PETSC_TRUE;
1220 
1221         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1222         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1223         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1224         a->maxnz += bs2*CHUNKSIZE;
1225         a->reallocs++;
1226         a->nz++;
1227       }
1228       N = nrow++ - 1;
1229       /* shift up all the later entries in this row */
1230       for (ii=N; ii>=i; ii--) {
1231         rp[ii+1] = rp[ii];
1232         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1233       }
1234       if (N>=i) {
1235         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1236       }
1237       rp[i]                      = bcol;
1238       ap[bs2*i + bs*cidx + ridx] = value;
1239       noinsert1:;
1240       low = i;
1241     }
1242     ailen[brow] = nrow;
1243   }
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 
1248 #undef __FUNCT__
1249 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1250 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1251 {
1252   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1253   Mat         outA;
1254   int         ierr;
1255   PetscTruth  row_identity,col_identity;
1256 
1257   PetscFunctionBegin;
1258   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1259   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1260   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1261   if (!row_identity || !col_identity) {
1262     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1263   }
1264 
1265   outA          = inA;
1266   inA->factor   = FACTOR_LU;
1267 
1268   if (!a->diag) {
1269     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1270   }
1271 
1272   a->row        = row;
1273   a->col        = col;
1274   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1275   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1276 
1277   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1278   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1279   PetscLogObjectParent(inA,a->icol);
1280 
1281   /*
1282       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1283       for ILU(0) factorization with natural ordering
1284   */
1285   if (a->bs < 8) {
1286     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1287   } else {
1288     if (!a->solve_work) {
1289       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1290       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1291     }
1292   }
1293 
1294   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1295 
1296   PetscFunctionReturn(0);
1297 }
1298 #undef __FUNCT__
1299 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1300 int MatPrintHelp_SeqBAIJ(Mat A)
1301 {
1302   static PetscTruth called = PETSC_FALSE;
1303   MPI_Comm          comm = A->comm;
1304   int               ierr;
1305 
1306   PetscFunctionBegin;
1307   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1308   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1309   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 EXTERN_C_BEGIN
1314 #undef __FUNCT__
1315 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1316 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1317 {
1318   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1319   int         i,nz,nbs;
1320 
1321   PetscFunctionBegin;
1322   nz  = baij->maxnz/baij->bs2;
1323   nbs = baij->nbs;
1324   for (i=0; i<nz; i++) {
1325     baij->j[i] = indices[i];
1326   }
1327   baij->nz = nz;
1328   for (i=0; i<nbs; i++) {
1329     baij->ilen[i] = baij->imax[i];
1330   }
1331 
1332   PetscFunctionReturn(0);
1333 }
1334 EXTERN_C_END
1335 
1336 #undef __FUNCT__
1337 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1338 /*@
1339     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1340        in the matrix.
1341 
1342   Input Parameters:
1343 +  mat - the SeqBAIJ matrix
1344 -  indices - the column indices
1345 
1346   Level: advanced
1347 
1348   Notes:
1349     This can be called if you have precomputed the nonzero structure of the
1350   matrix and want to provide it to the matrix object to improve the performance
1351   of the MatSetValues() operation.
1352 
1353     You MUST have set the correct numbers of nonzeros per row in the call to
1354   MatCreateSeqBAIJ().
1355 
1356     MUST be called before any calls to MatSetValues();
1357 
1358 @*/
1359 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1360 {
1361   int ierr,(*f)(Mat,int *);
1362 
1363   PetscFunctionBegin;
1364   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1365   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1366   if (f) {
1367     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1368   } else {
1369     SETERRQ(1,"Wrong type of matrix to set column indices");
1370   }
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1376 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1377 {
1378   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1379   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1380   PetscReal    atmp;
1381   PetscScalar  *x,zero = 0.0;
1382   MatScalar    *aa;
1383   int          ncols,brow,krow,kcol;
1384 
1385   PetscFunctionBegin;
1386   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1387   bs   = a->bs;
1388   aa   = a->a;
1389   ai   = a->i;
1390   aj   = a->j;
1391   mbs = a->mbs;
1392 
1393   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1394   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1395   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1396   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1397   for (i=0; i<mbs; i++) {
1398     ncols = ai[1] - ai[0]; ai++;
1399     brow  = bs*i;
1400     for (j=0; j<ncols; j++){
1401       /* bcol = bs*(*aj); */
1402       for (kcol=0; kcol<bs; kcol++){
1403         for (krow=0; krow<bs; krow++){
1404           atmp = PetscAbsScalar(*aa); aa++;
1405           row = brow + krow;    /* row index */
1406           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1407           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1408         }
1409       }
1410       aj++;
1411     }
1412   }
1413   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 #undef __FUNCT__
1418 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1419 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1420 {
1421   int        ierr;
1422 
1423   PetscFunctionBegin;
1424   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1430 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array)
1431 {
1432   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1433   PetscFunctionBegin;
1434   *array = a->a;
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 #undef __FUNCT__
1439 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1440 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array)
1441 {
1442   PetscFunctionBegin;
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #include "petscblaslapack.h"
1447 #undef __FUNCT__
1448 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1449 int MatAXPY_SeqBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1450 {
1451   Mat_SeqBAIJ  *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1452   int          ierr,one=1,i,bs=y->bs,j,bs2;
1453 
1454   PetscFunctionBegin;
1455   if (str == SAME_NONZERO_PATTERN) {
1456     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1457   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1458     if (y->xtoy && y->XtoY != X) {
1459       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1460       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1461     }
1462     if (!y->xtoy) { /* get xtoy */
1463       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1464       y->XtoY = X;
1465     }
1466     bs2 = bs*bs;
1467     for (i=0; i<x->nz; i++) {
1468       j = 0;
1469       while (j < bs2){
1470         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1471         j++;
1472       }
1473     }
1474     PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1475   } else {
1476     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1477   }
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 /* -------------------------------------------------------------------*/
1482 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1483        MatGetRow_SeqBAIJ,
1484        MatRestoreRow_SeqBAIJ,
1485        MatMult_SeqBAIJ_N,
1486        MatMultAdd_SeqBAIJ_N,
1487        MatMultTranspose_SeqBAIJ,
1488        MatMultTransposeAdd_SeqBAIJ,
1489        MatSolve_SeqBAIJ_N,
1490        0,
1491        0,
1492        0,
1493        MatLUFactor_SeqBAIJ,
1494        0,
1495        0,
1496        MatTranspose_SeqBAIJ,
1497        MatGetInfo_SeqBAIJ,
1498        MatEqual_SeqBAIJ,
1499        MatGetDiagonal_SeqBAIJ,
1500        MatDiagonalScale_SeqBAIJ,
1501        MatNorm_SeqBAIJ,
1502        0,
1503        MatAssemblyEnd_SeqBAIJ,
1504        0,
1505        MatSetOption_SeqBAIJ,
1506        MatZeroEntries_SeqBAIJ,
1507        MatZeroRows_SeqBAIJ,
1508        MatLUFactorSymbolic_SeqBAIJ,
1509        MatLUFactorNumeric_SeqBAIJ_N,
1510        0,
1511        0,
1512        MatSetUpPreallocation_SeqBAIJ,
1513        MatILUFactorSymbolic_SeqBAIJ,
1514        0,
1515        MatGetArray_SeqBAIJ,
1516        MatRestoreArray_SeqBAIJ,
1517        MatDuplicate_SeqBAIJ,
1518        0,
1519        0,
1520        MatILUFactor_SeqBAIJ,
1521        0,
1522        MatAXPY_SeqBAIJ,
1523        MatGetSubMatrices_SeqBAIJ,
1524        MatIncreaseOverlap_SeqBAIJ,
1525        MatGetValues_SeqBAIJ,
1526        0,
1527        MatPrintHelp_SeqBAIJ,
1528        MatScale_SeqBAIJ,
1529        0,
1530        0,
1531        0,
1532        MatGetBlockSize_SeqBAIJ,
1533        MatGetRowIJ_SeqBAIJ,
1534        MatRestoreRowIJ_SeqBAIJ,
1535        0,
1536        0,
1537        0,
1538        0,
1539        0,
1540        0,
1541        MatSetValuesBlocked_SeqBAIJ,
1542        MatGetSubMatrix_SeqBAIJ,
1543        MatDestroy_SeqBAIJ,
1544        MatView_SeqBAIJ,
1545        MatGetPetscMaps_Petsc,
1546        0,
1547        0,
1548        0,
1549        0,
1550        0,
1551        0,
1552        MatGetRowMax_SeqBAIJ,
1553        MatConvert_Basic};
1554 
1555 EXTERN_C_BEGIN
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1558 int MatStoreValues_SeqBAIJ(Mat mat)
1559 {
1560   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1561   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1562   int         ierr;
1563 
1564   PetscFunctionBegin;
1565   if (aij->nonew != 1) {
1566     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1567   }
1568 
1569   /* allocate space for values if not already there */
1570   if (!aij->saved_values) {
1571     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1572   }
1573 
1574   /* copy values over */
1575   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1576   PetscFunctionReturn(0);
1577 }
1578 EXTERN_C_END
1579 
1580 EXTERN_C_BEGIN
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1583 int MatRetrieveValues_SeqBAIJ(Mat mat)
1584 {
1585   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1586   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1587 
1588   PetscFunctionBegin;
1589   if (aij->nonew != 1) {
1590     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1591   }
1592   if (!aij->saved_values) {
1593     SETERRQ(1,"Must call MatStoreValues(A);first");
1594   }
1595 
1596   /* copy values over */
1597   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1598   PetscFunctionReturn(0);
1599 }
1600 EXTERN_C_END
1601 
1602 EXTERN_C_BEGIN
1603 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1604 EXTERN_C_END
1605 
1606 EXTERN_C_BEGIN
1607 #undef __FUNCT__
1608 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
1609 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz)
1610 {
1611   Mat_SeqBAIJ *b;
1612   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1613   PetscTruth  flg;
1614 
1615   PetscFunctionBegin;
1616 
1617   B->preallocated = PETSC_TRUE;
1618   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1619   if (nnz && newbs != bs) {
1620     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1621   }
1622   bs   = newbs;
1623 
1624   mbs  = B->m/bs;
1625   nbs  = B->n/bs;
1626   bs2  = bs*bs;
1627 
1628   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1629     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1630   }
1631 
1632   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1633   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1634   if (nnz) {
1635     for (i=0; i<mbs; i++) {
1636       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1637       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1638     }
1639   }
1640 
1641   b       = (Mat_SeqBAIJ*)B->data;
1642   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1643   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1644   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1645   if (!flg) {
1646     switch (bs) {
1647     case 1:
1648       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1649       B->ops->mult            = MatMult_SeqBAIJ_1;
1650       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1651       break;
1652     case 2:
1653       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1654       B->ops->mult            = MatMult_SeqBAIJ_2;
1655       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1656       break;
1657     case 3:
1658       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1659       B->ops->mult            = MatMult_SeqBAIJ_3;
1660       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1661       break;
1662     case 4:
1663       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1664       B->ops->mult            = MatMult_SeqBAIJ_4;
1665       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1666       break;
1667     case 5:
1668       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1669       B->ops->mult            = MatMult_SeqBAIJ_5;
1670       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1671       break;
1672     case 6:
1673       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1674       B->ops->mult            = MatMult_SeqBAIJ_6;
1675       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1676       break;
1677     case 7:
1678       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1679       B->ops->mult            = MatMult_SeqBAIJ_7;
1680       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1681       break;
1682     default:
1683       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1684       B->ops->mult            = MatMult_SeqBAIJ_N;
1685       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1686       break;
1687     }
1688   }
1689   b->bs      = bs;
1690   b->mbs     = mbs;
1691   b->nbs     = nbs;
1692   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1693   if (!nnz) {
1694     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1695     else if (nz <= 0)        nz = 1;
1696     for (i=0; i<mbs; i++) b->imax[i] = nz;
1697     nz = nz*mbs;
1698   } else {
1699     nz = 0;
1700     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1701   }
1702 
1703   /* allocate the matrix space */
1704   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1705   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1706   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1707   b->j  = (int*)(b->a + nz*bs2);
1708   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1709   b->i  = b->j + nz;
1710   b->singlemalloc = PETSC_TRUE;
1711 
1712   b->i[0] = 0;
1713   for (i=1; i<mbs+1; i++) {
1714     b->i[i] = b->i[i-1] + b->imax[i-1];
1715   }
1716 
1717   /* b->ilen will count nonzeros in each block row so far. */
1718   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1719   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1720   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1721 
1722   b->bs               = bs;
1723   b->bs2              = bs2;
1724   b->mbs              = mbs;
1725   b->nz               = 0;
1726   b->maxnz            = nz*bs2;
1727   B->info.nz_unneeded = (PetscReal)b->maxnz;
1728   PetscFunctionReturn(0);
1729 }
1730 EXTERN_C_END
1731 
1732 EXTERN_C_BEGIN
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatCreate_SeqBAIJ"
1735 int MatCreate_SeqBAIJ(Mat B)
1736 {
1737   int         ierr,size;
1738   Mat_SeqBAIJ *b;
1739 
1740   PetscFunctionBegin;
1741   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1742   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1743 
1744   B->m = B->M = PetscMax(B->m,B->M);
1745   B->n = B->N = PetscMax(B->n,B->N);
1746   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1747   B->data = (void*)b;
1748   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1749   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1750   B->factor           = 0;
1751   B->lupivotthreshold = 1.0;
1752   B->mapping          = 0;
1753   b->row              = 0;
1754   b->col              = 0;
1755   b->icol             = 0;
1756   b->reallocs         = 0;
1757   b->saved_values     = 0;
1758 #if defined(PETSC_USE_MAT_SINGLE)
1759   b->setvalueslen     = 0;
1760   b->setvaluescopy    = PETSC_NULL;
1761 #endif
1762 
1763   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1764   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1765 
1766   b->sorted           = PETSC_FALSE;
1767   b->roworiented      = PETSC_TRUE;
1768   b->nonew            = 0;
1769   b->diag             = 0;
1770   b->solve_work       = 0;
1771   b->mult_work        = 0;
1772   B->spptr            = 0;
1773   B->info.nz_unneeded = (PetscReal)b->maxnz;
1774   b->keepzeroedrows   = PETSC_FALSE;
1775   b->xtoy              = 0;
1776   b->XtoY              = 0;
1777 
1778   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1779                                      "MatStoreValues_SeqBAIJ",
1780                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1781   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1782                                      "MatRetrieveValues_SeqBAIJ",
1783                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1784   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1785                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1786                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1787   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
1788                                      "MatConvert_SeqBAIJ_SeqAIJ",
1789                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1790   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
1791                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
1792                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
1793   PetscFunctionReturn(0);
1794 }
1795 EXTERN_C_END
1796 
1797 #undef __FUNCT__
1798 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1799 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1800 {
1801   Mat         C;
1802   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1803   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1804 
1805   PetscFunctionBegin;
1806   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1807 
1808   *B = 0;
1809   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1810   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1811   c    = (Mat_SeqBAIJ*)C->data;
1812 
1813   c->bs         = a->bs;
1814   c->bs2        = a->bs2;
1815   c->mbs        = a->mbs;
1816   c->nbs        = a->nbs;
1817   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1818 
1819   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1820   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1821   for (i=0; i<mbs; i++) {
1822     c->imax[i] = a->imax[i];
1823     c->ilen[i] = a->ilen[i];
1824   }
1825 
1826   /* allocate the matrix space */
1827   c->singlemalloc = PETSC_TRUE;
1828   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1829   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1830   c->j = (int*)(c->a + nz*bs2);
1831   c->i = c->j + nz;
1832   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1833   if (mbs > 0) {
1834     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1835     if (cpvalues == MAT_COPY_VALUES) {
1836       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1837     } else {
1838       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1839     }
1840   }
1841 
1842   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1843   c->sorted      = a->sorted;
1844   c->roworiented = a->roworiented;
1845   c->nonew       = a->nonew;
1846 
1847   if (a->diag) {
1848     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1849     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1850     for (i=0; i<mbs; i++) {
1851       c->diag[i] = a->diag[i];
1852     }
1853   } else c->diag        = 0;
1854   c->nz                 = a->nz;
1855   c->maxnz              = a->maxnz;
1856   c->solve_work         = 0;
1857   C->spptr              = 0;     /* Dangerous -I'm throwing away a->spptr */
1858   c->mult_work          = 0;
1859   C->preallocated       = PETSC_TRUE;
1860   C->assembled          = PETSC_TRUE;
1861   *B = C;
1862   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 EXTERN_C_BEGIN
1867 #undef __FUNCT__
1868 #define __FUNCT__ "MatLoad_SeqBAIJ"
1869 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1870 {
1871   Mat_SeqBAIJ  *a;
1872   Mat          B;
1873   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1874   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1875   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1876   int          *masked,nmask,tmp,bs2,ishift;
1877   PetscScalar  *aa;
1878   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1879 #if defined(PETSC_HAVE_DSCPACK)
1880   PetscTruth   flag;
1881 #endif
1882 
1883   PetscFunctionBegin;
1884   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1885   bs2  = bs*bs;
1886 
1887   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1888   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1889   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1890   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1891   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1892   M = header[1]; N = header[2]; nz = header[3];
1893 
1894   if (header[3] < 0) {
1895     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1896   }
1897 
1898   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1899 
1900   /*
1901      This code adds extra rows to make sure the number of rows is
1902     divisible by the blocksize
1903   */
1904   mbs        = M/bs;
1905   extra_rows = bs - M + bs*(mbs);
1906   if (extra_rows == bs) extra_rows = 0;
1907   else                  mbs++;
1908   if (extra_rows) {
1909     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1910   }
1911 
1912   /* read in row lengths */
1913   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1914   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1915   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1916 
1917   /* read in column indices */
1918   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1919   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1920   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1921 
1922   /* loop over row lengths determining block row lengths */
1923   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1924   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1925   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1926   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1927   masked   = mask + mbs;
1928   rowcount = 0; nzcount = 0;
1929   for (i=0; i<mbs; i++) {
1930     nmask = 0;
1931     for (j=0; j<bs; j++) {
1932       kmax = rowlengths[rowcount];
1933       for (k=0; k<kmax; k++) {
1934         tmp = jj[nzcount++]/bs;
1935         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1936       }
1937       rowcount++;
1938     }
1939     browlengths[i] += nmask;
1940     /* zero out the mask elements we set */
1941     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1942   }
1943 
1944   /* create our matrix */
1945   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1946   B = *A;
1947   a = (Mat_SeqBAIJ*)B->data;
1948 
1949   /* set matrix "i" values */
1950   a->i[0] = 0;
1951   for (i=1; i<= mbs; i++) {
1952     a->i[i]      = a->i[i-1] + browlengths[i-1];
1953     a->ilen[i-1] = browlengths[i-1];
1954   }
1955   a->nz         = 0;
1956   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1957 
1958   /* read in nonzero values */
1959   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1960   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1961   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1962 
1963   /* set "a" and "j" values into matrix */
1964   nzcount = 0; jcount = 0;
1965   for (i=0; i<mbs; i++) {
1966     nzcountb = nzcount;
1967     nmask    = 0;
1968     for (j=0; j<bs; j++) {
1969       kmax = rowlengths[i*bs+j];
1970       for (k=0; k<kmax; k++) {
1971         tmp = jj[nzcount++]/bs;
1972 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1973       }
1974     }
1975     /* sort the masked values */
1976     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1977 
1978     /* set "j" values into matrix */
1979     maskcount = 1;
1980     for (j=0; j<nmask; j++) {
1981       a->j[jcount++]  = masked[j];
1982       mask[masked[j]] = maskcount++;
1983     }
1984     /* set "a" values into matrix */
1985     ishift = bs2*a->i[i];
1986     for (j=0; j<bs; j++) {
1987       kmax = rowlengths[i*bs+j];
1988       for (k=0; k<kmax; k++) {
1989         tmp       = jj[nzcountb]/bs ;
1990         block     = mask[tmp] - 1;
1991         point     = jj[nzcountb] - bs*tmp;
1992         idx       = ishift + bs2*block + j + bs*point;
1993         a->a[idx] = (MatScalar)aa[nzcountb++];
1994       }
1995     }
1996     /* zero out the mask elements we set */
1997     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1998   }
1999   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2000 
2001   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2002   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2003   ierr = PetscFree(aa);CHKERRQ(ierr);
2004   ierr = PetscFree(jj);CHKERRQ(ierr);
2005   ierr = PetscFree(mask);CHKERRQ(ierr);
2006 
2007   B->assembled = PETSC_TRUE;
2008 
2009 #if defined(PETSC_HAVE_DSCPACK)
2010   ierr = PetscOptionsHasName(B->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr);
2011   if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(B);CHKERRQ(ierr); }
2012 #endif
2013 
2014   ierr = MatView_Private(B);CHKERRQ(ierr);
2015   PetscFunctionReturn(0);
2016 }
2017 EXTERN_C_END
2018 
2019 #undef __FUNCT__
2020 #define __FUNCT__ "MatCreateSeqBAIJ"
2021 /*@C
2022    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2023    compressed row) format.  For good matrix assembly performance the
2024    user should preallocate the matrix storage by setting the parameter nz
2025    (or the array nnz).  By setting these parameters accurately, performance
2026    during matrix assembly can be increased by more than a factor of 50.
2027 
2028    Collective on MPI_Comm
2029 
2030    Input Parameters:
2031 +  comm - MPI communicator, set to PETSC_COMM_SELF
2032 .  bs - size of block
2033 .  m - number of rows
2034 .  n - number of columns
2035 .  nz - number of nonzero blocks  per block row (same for all rows)
2036 -  nnz - array containing the number of nonzero blocks in the various block rows
2037          (possibly different for each block row) or PETSC_NULL
2038 
2039    Output Parameter:
2040 .  A - the matrix
2041 
2042    Options Database Keys:
2043 .   -mat_no_unroll - uses code that does not unroll the loops in the
2044                      block calculations (much slower)
2045 .    -mat_block_size - size of the blocks to use
2046 
2047    Level: intermediate
2048 
2049    Notes:
2050    A nonzero block is any block that as 1 or more nonzeros in it
2051 
2052    The block AIJ format is fully compatible with standard Fortran 77
2053    storage.  That is, the stored row and column indices can begin at
2054    either one (as in Fortran) or zero.  See the users' manual for details.
2055 
2056    Specify the preallocated storage with either nz or nnz (not both).
2057    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2058    allocation.  For additional details, see the users manual chapter on
2059    matrices.
2060 
2061 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2062 @*/
2063 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
2064 {
2065   int ierr;
2066 
2067   PetscFunctionBegin;
2068   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2069   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2070   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 #undef __FUNCT__
2075 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2076 /*@C
2077    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2078    per row in the matrix. For good matrix assembly performance the
2079    user should preallocate the matrix storage by setting the parameter nz
2080    (or the array nnz).  By setting these parameters accurately, performance
2081    during matrix assembly can be increased by more than a factor of 50.
2082 
2083    Collective on MPI_Comm
2084 
2085    Input Parameters:
2086 +  A - the matrix
2087 .  bs - size of block
2088 .  nz - number of block nonzeros per block row (same for all rows)
2089 -  nnz - array containing the number of block nonzeros in the various block rows
2090          (possibly different for each block row) or PETSC_NULL
2091 
2092    Options Database Keys:
2093 .   -mat_no_unroll - uses code that does not unroll the loops in the
2094                      block calculations (much slower)
2095 .    -mat_block_size - size of the blocks to use
2096 
2097    Level: intermediate
2098 
2099    Notes:
2100    The block AIJ format is fully compatible with standard Fortran 77
2101    storage.  That is, the stored row and column indices can begin at
2102    either one (as in Fortran) or zero.  See the users' manual for details.
2103 
2104    Specify the preallocated storage with either nz or nnz (not both).
2105    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2106    allocation.  For additional details, see the users manual chapter on
2107    matrices.
2108 
2109 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2110 @*/
2111 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
2112 {
2113   int ierr,(*f)(Mat,int,int,int*);
2114 
2115   PetscFunctionBegin;
2116   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2117   if (f) {
2118     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2119   }
2120   PetscFunctionReturn(0);
2121 }
2122