xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 5ed2d596fd9914de76abbaed56c65f4792ae57ae)
1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "petscsys.h"                     /*I "petscmat.h" I*/
11 
12 /*  UGLY, ugly, ugly
13    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
14    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
15    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
16    converts the entries into single precision and then calls ..._MatScalar() to put them
17    into the single precision data structures.
18 */
19 #if defined(PETSC_USE_MAT_SINGLE)
20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
21 #else
22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
23 #endif
24 #if defined(PETSC_HAVE_DSCPACK)
25 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat);
26 #endif
27 
28 #define CHUNKSIZE  10
29 
30 /*
31      Checks for missing diagonals
32 */
33 #undef __FUNCT__
34 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
35 int MatMissingDiagonal_SeqBAIJ(Mat A)
36 {
37   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
38   int         *diag,*jj = a->j,i,ierr;
39 
40   PetscFunctionBegin;
41   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
42   diag = a->diag;
43   for (i=0; i<a->mbs; i++) {
44     if (jj[diag[i]] != i) {
45       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
46     }
47   }
48   PetscFunctionReturn(0);
49 }
50 
51 #undef __FUNCT__
52 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
53 int MatMarkDiagonal_SeqBAIJ(Mat A)
54 {
55   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
56   int         i,j,*diag,m = a->mbs,ierr;
57 
58   PetscFunctionBegin;
59   if (a->diag) PetscFunctionReturn(0);
60 
61   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
62   PetscLogObjectMemory(A,(m+1)*sizeof(int));
63   for (i=0; i<m; i++) {
64     diag[i] = a->i[i+1];
65     for (j=a->i[i]; j<a->i[i+1]; j++) {
66       if (a->j[j] == i) {
67         diag[i] = j;
68         break;
69       }
70     }
71   }
72   a->diag = diag;
73   PetscFunctionReturn(0);
74 }
75 
76 
77 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
78 
79 #undef __FUNCT__
80 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
81 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
82 {
83   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
84   int         ierr,n = a->mbs,i;
85 
86   PetscFunctionBegin;
87   *nn = n;
88   if (!ia) PetscFunctionReturn(0);
89   if (symmetric) {
90     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
91   } else if (oshift == 1) {
92     /* temporarily add 1 to i and j indices */
93     int nz = a->i[n];
94     for (i=0; i<nz; i++) a->j[i]++;
95     for (i=0; i<n+1; i++) a->i[i]++;
96     *ia = a->i; *ja = a->j;
97   } else {
98     *ia = a->i; *ja = a->j;
99   }
100 
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
106 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
107 {
108   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
109   int         i,n = a->mbs,ierr;
110 
111   PetscFunctionBegin;
112   if (!ia) PetscFunctionReturn(0);
113   if (symmetric) {
114     ierr = PetscFree(*ia);CHKERRQ(ierr);
115     ierr = PetscFree(*ja);CHKERRQ(ierr);
116   } else if (oshift == 1) {
117     int nz = a->i[n]-1;
118     for (i=0; i<nz; i++) a->j[i]--;
119     for (i=0; i<n+1; i++) a->i[i]--;
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
126 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
127 {
128   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
129 
130   PetscFunctionBegin;
131   *bs = baij->bs;
132   PetscFunctionReturn(0);
133 }
134 
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatDestroy_SeqBAIJ"
138 int MatDestroy_SeqBAIJ(Mat A)
139 {
140   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
141   int         ierr;
142 
143   PetscFunctionBegin;
144 #if defined(PETSC_USE_LOG)
145   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
146 #endif
147   ierr = PetscFree(a->a);CHKERRQ(ierr);
148   if (!a->singlemalloc) {
149     ierr = PetscFree(a->i);CHKERRQ(ierr);
150     ierr = PetscFree(a->j);CHKERRQ(ierr);
151   }
152   if (a->row) {
153     ierr = ISDestroy(a->row);CHKERRQ(ierr);
154   }
155   if (a->col) {
156     ierr = ISDestroy(a->col);CHKERRQ(ierr);
157   }
158   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
159   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
160   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
161   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
162   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
163   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
164   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
165 #if defined(PETSC_USE_MAT_SINGLE)
166   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
167 #endif
168   ierr = PetscFree(a);CHKERRQ(ierr);
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatSetOption_SeqBAIJ"
174 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
175 {
176   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
177 
178   PetscFunctionBegin;
179   switch (op) {
180   case MAT_ROW_ORIENTED:
181     a->roworiented    = PETSC_TRUE;
182     break;
183   case MAT_COLUMN_ORIENTED:
184     a->roworiented    = PETSC_FALSE;
185     break;
186   case MAT_COLUMNS_SORTED:
187     a->sorted         = PETSC_TRUE;
188     break;
189   case MAT_COLUMNS_UNSORTED:
190     a->sorted         = PETSC_FALSE;
191     break;
192   case MAT_KEEP_ZEROED_ROWS:
193     a->keepzeroedrows = PETSC_TRUE;
194     break;
195   case MAT_NO_NEW_NONZERO_LOCATIONS:
196     a->nonew          = 1;
197     break;
198   case MAT_NEW_NONZERO_LOCATION_ERR:
199     a->nonew          = -1;
200     break;
201   case MAT_NEW_NONZERO_ALLOCATION_ERR:
202     a->nonew          = -2;
203     break;
204   case MAT_YES_NEW_NONZERO_LOCATIONS:
205     a->nonew          = 0;
206     break;
207   case MAT_ROWS_SORTED:
208   case MAT_ROWS_UNSORTED:
209   case MAT_YES_NEW_DIAGONALS:
210   case MAT_IGNORE_OFF_PROC_ENTRIES:
211   case MAT_USE_HASH_TABLE:
212     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
213     break;
214   case MAT_NO_NEW_DIAGONALS:
215     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
216   case MAT_USE_SINGLE_PRECISION_SOLVES:
217     if (a->bs==4) {
218       PetscTruth sse_enabled_local,sse_enabled_global;
219       int        ierr;
220 
221       sse_enabled_local  = PETSC_FALSE;
222       sse_enabled_global = PETSC_FALSE;
223 
224       ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr);
225 #if defined(PETSC_HAVE_SSE)
226       if (sse_enabled_local) {
227           a->single_precision_solves = PETSC_TRUE;
228           A->ops->solve              = MatSolve_SeqBAIJ_Update;
229           A->ops->solvetranspose     = MatSolveTranspose_SeqBAIJ_Update;
230           PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n");
231           break;
232       } else {
233         PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
234       }
235 #else
236       PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
237 #endif
238     }
239     break;
240   default:
241     SETERRQ(PETSC_ERR_SUP,"unknown option");
242   }
243   PetscFunctionReturn(0);
244 }
245 
246 #undef __FUNCT__
247 #define __FUNCT__ "MatGetRow_SeqBAIJ"
248 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
249 {
250   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
251   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
252   MatScalar    *aa,*aa_i;
253   PetscScalar  *v_i;
254 
255   PetscFunctionBegin;
256   bs  = a->bs;
257   ai  = a->i;
258   aj  = a->j;
259   aa  = a->a;
260   bs2 = a->bs2;
261 
262   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
263 
264   bn  = row/bs;   /* Block number */
265   bp  = row % bs; /* Block Position */
266   M   = ai[bn+1] - ai[bn];
267   *nz = bs*M;
268 
269   if (v) {
270     *v = 0;
271     if (*nz) {
272       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
273       for (i=0; i<M; i++) { /* for each block in the block row */
274         v_i  = *v + i*bs;
275         aa_i = aa + bs2*(ai[bn] + i);
276         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
277       }
278     }
279   }
280 
281   if (idx) {
282     *idx = 0;
283     if (*nz) {
284       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
285       for (i=0; i<M; i++) { /* for each block in the block row */
286         idx_i = *idx + i*bs;
287         itmp  = bs*aj[ai[bn] + i];
288         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
289       }
290     }
291   }
292   PetscFunctionReturn(0);
293 }
294 
295 #undef __FUNCT__
296 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
297 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
298 {
299   int ierr;
300 
301   PetscFunctionBegin;
302   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
303   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
304   PetscFunctionReturn(0);
305 }
306 
307 #undef __FUNCT__
308 #define __FUNCT__ "MatTranspose_SeqBAIJ"
309 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
310 {
311   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
312   Mat         C;
313   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
314   int         *rows,*cols,bs2=a->bs2;
315   PetscScalar *array;
316 
317   PetscFunctionBegin;
318   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
319   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
320   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
321 
322 #if defined(PETSC_USE_MAT_SINGLE)
323   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
324   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
325 #else
326   array = a->a;
327 #endif
328 
329   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
330   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
331   ierr = PetscFree(col);CHKERRQ(ierr);
332   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
333   cols = rows + bs;
334   for (i=0; i<mbs; i++) {
335     cols[0] = i*bs;
336     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
337     len = ai[i+1] - ai[i];
338     for (j=0; j<len; j++) {
339       rows[0] = (*aj++)*bs;
340       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
341       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
342       array += bs2;
343     }
344   }
345   ierr = PetscFree(rows);CHKERRQ(ierr);
346 #if defined(PETSC_USE_MAT_SINGLE)
347   ierr = PetscFree(array);
348 #endif
349 
350   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
351   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
352 
353   if (B) {
354     *B = C;
355   } else {
356     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
357   }
358   PetscFunctionReturn(0);
359 }
360 
361 #undef __FUNCT__
362 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
363 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
364 {
365   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
366   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
367   PetscScalar *aa;
368   FILE        *file;
369 
370   PetscFunctionBegin;
371   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
372   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
373   col_lens[0] = MAT_FILE_COOKIE;
374 
375   col_lens[1] = A->m;
376   col_lens[2] = A->n;
377   col_lens[3] = a->nz*bs2;
378 
379   /* store lengths of each row and write (including header) to file */
380   count = 0;
381   for (i=0; i<a->mbs; i++) {
382     for (j=0; j<bs; j++) {
383       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
384     }
385   }
386   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
387   ierr = PetscFree(col_lens);CHKERRQ(ierr);
388 
389   /* store column indices (zero start index) */
390   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
391   count = 0;
392   for (i=0; i<a->mbs; i++) {
393     for (j=0; j<bs; j++) {
394       for (k=a->i[i]; k<a->i[i+1]; k++) {
395         for (l=0; l<bs; l++) {
396           jj[count++] = bs*a->j[k] + l;
397         }
398       }
399     }
400   }
401   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
402   ierr = PetscFree(jj);CHKERRQ(ierr);
403 
404   /* store nonzero values */
405   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
406   count = 0;
407   for (i=0; i<a->mbs; i++) {
408     for (j=0; j<bs; j++) {
409       for (k=a->i[i]; k<a->i[i+1]; k++) {
410         for (l=0; l<bs; l++) {
411           aa[count++] = a->a[bs2*k + l*bs + j];
412         }
413       }
414     }
415   }
416   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
417   ierr = PetscFree(aa);CHKERRQ(ierr);
418 
419   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
420   if (file) {
421     fprintf(file,"-matload_block_size %d\n",a->bs);
422   }
423   PetscFunctionReturn(0);
424 }
425 
426 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer);
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
430 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
431 {
432   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
433   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
434   PetscViewerFormat format;
435 
436   PetscFunctionBegin;
437   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
438   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
439     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
440   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
441     Mat aij;
442     ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr);
443     ierr = MatView(aij,viewer);CHKERRQ(ierr);
444     ierr = MatDestroy(aij);CHKERRQ(ierr);
445   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
446 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
447      ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr);
448 #endif
449      PetscFunctionReturn(0);
450   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
451     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
452     for (i=0; i<a->mbs; i++) {
453       for (j=0; j<bs; j++) {
454         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
455         for (k=a->i[i]; k<a->i[i+1]; k++) {
456           for (l=0; l<bs; l++) {
457 #if defined(PETSC_USE_COMPLEX)
458             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
459               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
460                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
461             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
462               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
463                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
464             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
465               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
466             }
467 #else
468             if (a->a[bs2*k + l*bs + j] != 0.0) {
469               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
470             }
471 #endif
472           }
473         }
474         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
475       }
476     }
477     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
478   } else {
479     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
480     for (i=0; i<a->mbs; i++) {
481       for (j=0; j<bs; j++) {
482         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
483         for (k=a->i[i]; k<a->i[i+1]; k++) {
484           for (l=0; l<bs; l++) {
485 #if defined(PETSC_USE_COMPLEX)
486             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
487               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
488                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
489             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
490               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
491                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
492             } else {
493               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
494             }
495 #else
496             ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
497 #endif
498           }
499         }
500         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
501       }
502     }
503     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
504   }
505   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
506   PetscFunctionReturn(0);
507 }
508 
509 #undef __FUNCT__
510 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
511 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
512 {
513   Mat          A = (Mat) Aa;
514   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
515   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
516   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
517   MatScalar    *aa;
518   PetscViewer  viewer;
519 
520   PetscFunctionBegin;
521 
522   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
523   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
524 
525   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
526 
527   /* loop over matrix elements drawing boxes */
528   color = PETSC_DRAW_BLUE;
529   for (i=0,row=0; i<mbs; i++,row+=bs) {
530     for (j=a->i[i]; j<a->i[i+1]; j++) {
531       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
532       x_l = a->j[j]*bs; x_r = x_l + 1.0;
533       aa = a->a + j*bs2;
534       for (k=0; k<bs; k++) {
535         for (l=0; l<bs; l++) {
536           if (PetscRealPart(*aa++) >=  0.) continue;
537           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
538         }
539       }
540     }
541   }
542   color = PETSC_DRAW_CYAN;
543   for (i=0,row=0; i<mbs; i++,row+=bs) {
544     for (j=a->i[i]; j<a->i[i+1]; j++) {
545       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
546       x_l = a->j[j]*bs; x_r = x_l + 1.0;
547       aa = a->a + j*bs2;
548       for (k=0; k<bs; k++) {
549         for (l=0; l<bs; l++) {
550           if (PetscRealPart(*aa++) != 0.) continue;
551           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
552         }
553       }
554     }
555   }
556 
557   color = PETSC_DRAW_RED;
558   for (i=0,row=0; i<mbs; i++,row+=bs) {
559     for (j=a->i[i]; j<a->i[i+1]; j++) {
560       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
561       x_l = a->j[j]*bs; x_r = x_l + 1.0;
562       aa = a->a + j*bs2;
563       for (k=0; k<bs; k++) {
564         for (l=0; l<bs; l++) {
565           if (PetscRealPart(*aa++) <= 0.) continue;
566           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
567         }
568       }
569     }
570   }
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
576 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
577 {
578   int          ierr;
579   PetscReal    xl,yl,xr,yr,w,h;
580   PetscDraw    draw;
581   PetscTruth   isnull;
582 
583   PetscFunctionBegin;
584 
585   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
586   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
587 
588   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
589   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
590   xr += w;    yr += h;  xl = -w;     yl = -h;
591   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
592   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
593   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatView_SeqBAIJ"
599 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
600 {
601   int        ierr;
602   PetscTruth issocket,isascii,isbinary,isdraw;
603 
604   PetscFunctionBegin;
605   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
606   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
607   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
608   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
609   if (issocket) {
610     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
611   } else if (isascii){
612     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
613   } else if (isbinary) {
614     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
615   } else if (isdraw) {
616     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
617   } else {
618     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
619   }
620   PetscFunctionReturn(0);
621 }
622 
623 
624 #undef __FUNCT__
625 #define __FUNCT__ "MatGetValues_SeqBAIJ"
626 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
627 {
628   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
629   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
630   int        *ai = a->i,*ailen = a->ilen;
631   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
632   MatScalar  *ap,*aa = a->a,zero = 0.0;
633 
634   PetscFunctionBegin;
635   for (k=0; k<m; k++) { /* loop over rows */
636     row  = im[k]; brow = row/bs;
637     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
638     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
639     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
640     nrow = ailen[brow];
641     for (l=0; l<n; l++) { /* loop over columns */
642       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
643       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
644       col  = in[l] ;
645       bcol = col/bs;
646       cidx = col%bs;
647       ridx = row%bs;
648       high = nrow;
649       low  = 0; /* assume unsorted */
650       while (high-low > 5) {
651         t = (low+high)/2;
652         if (rp[t] > bcol) high = t;
653         else             low  = t;
654       }
655       for (i=low; i<high; i++) {
656         if (rp[i] > bcol) break;
657         if (rp[i] == bcol) {
658           *v++ = ap[bs2*i+bs*cidx+ridx];
659           goto finished;
660         }
661       }
662       *v++ = zero;
663       finished:;
664     }
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 #if defined(PETSC_USE_MAT_SINGLE)
670 #undef __FUNCT__
671 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
672 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
673 {
674   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
675   int         ierr,i,N = m*n*b->bs2;
676   MatScalar   *vsingle;
677 
678   PetscFunctionBegin;
679   if (N > b->setvalueslen) {
680     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
681     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
682     b->setvalueslen  = N;
683   }
684   vsingle = b->setvaluescopy;
685   for (i=0; i<N; i++) {
686     vsingle[i] = v[i];
687   }
688   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
689   PetscFunctionReturn(0);
690 }
691 #endif
692 
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
696 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
697 {
698   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
699   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
700   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
701   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
702   PetscTruth  roworiented=a->roworiented;
703   MatScalar   *value = v,*ap,*aa = a->a,*bap;
704 
705   PetscFunctionBegin;
706   if (roworiented) {
707     stepval = (n-1)*bs;
708   } else {
709     stepval = (m-1)*bs;
710   }
711   for (k=0; k<m; k++) { /* loop over added rows */
712     row  = im[k];
713     if (row < 0) continue;
714 #if defined(PETSC_USE_BOPT_g)
715     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
716 #endif
717     rp   = aj + ai[row];
718     ap   = aa + bs2*ai[row];
719     rmax = imax[row];
720     nrow = ailen[row];
721     low  = 0;
722     for (l=0; l<n; l++) { /* loop over added columns */
723       if (in[l] < 0) continue;
724 #if defined(PETSC_USE_BOPT_g)
725       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
726 #endif
727       col = in[l];
728       if (roworiented) {
729         value = v + k*(stepval+bs)*bs + l*bs;
730       } else {
731         value = v + l*(stepval+bs)*bs + k*bs;
732       }
733       if (!sorted) low = 0; high = nrow;
734       while (high-low > 7) {
735         t = (low+high)/2;
736         if (rp[t] > col) high = t;
737         else             low  = t;
738       }
739       for (i=low; i<high; i++) {
740         if (rp[i] > col) break;
741         if (rp[i] == col) {
742           bap  = ap +  bs2*i;
743           if (roworiented) {
744             if (is == ADD_VALUES) {
745               for (ii=0; ii<bs; ii++,value+=stepval) {
746                 for (jj=ii; jj<bs2; jj+=bs) {
747                   bap[jj] += *value++;
748                 }
749               }
750             } else {
751               for (ii=0; ii<bs; ii++,value+=stepval) {
752                 for (jj=ii; jj<bs2; jj+=bs) {
753                   bap[jj] = *value++;
754                 }
755               }
756             }
757           } else {
758             if (is == ADD_VALUES) {
759               for (ii=0; ii<bs; ii++,value+=stepval) {
760                 for (jj=0; jj<bs; jj++) {
761                   *bap++ += *value++;
762                 }
763               }
764             } else {
765               for (ii=0; ii<bs; ii++,value+=stepval) {
766                 for (jj=0; jj<bs; jj++) {
767                   *bap++  = *value++;
768                 }
769               }
770             }
771           }
772           goto noinsert2;
773         }
774       }
775       if (nonew == 1) goto noinsert2;
776       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
777       if (nrow >= rmax) {
778         /* there is no extra room in row, therefore enlarge */
779         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
780         MatScalar *new_a;
781 
782         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
783 
784         /* malloc new storage space */
785         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
786 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
787         new_j   = (int*)(new_a + bs2*new_nz);
788         new_i   = new_j + new_nz;
789 
790         /* copy over old data into new slots */
791         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
792         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
793         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
794         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
795         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
796         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
797         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
798         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
799         /* free up old matrix storage */
800         ierr = PetscFree(a->a);CHKERRQ(ierr);
801         if (!a->singlemalloc) {
802           ierr = PetscFree(a->i);CHKERRQ(ierr);
803           ierr = PetscFree(a->j);CHKERRQ(ierr);
804         }
805         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
806         a->singlemalloc = PETSC_TRUE;
807 
808         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
809         rmax = imax[row] = imax[row] + CHUNKSIZE;
810         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
811         a->maxnz += bs2*CHUNKSIZE;
812         a->reallocs++;
813         a->nz++;
814       }
815       N = nrow++ - 1;
816       /* shift up all the later entries in this row */
817       for (ii=N; ii>=i; ii--) {
818         rp[ii+1] = rp[ii];
819         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
820       }
821       if (N >= i) {
822         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
823       }
824       rp[i] = col;
825       bap   = ap +  bs2*i;
826       if (roworiented) {
827         for (ii=0; ii<bs; ii++,value+=stepval) {
828           for (jj=ii; jj<bs2; jj+=bs) {
829             bap[jj] = *value++;
830           }
831         }
832       } else {
833         for (ii=0; ii<bs; ii++,value+=stepval) {
834           for (jj=0; jj<bs; jj++) {
835             *bap++  = *value++;
836           }
837         }
838       }
839       noinsert2:;
840       low = i;
841     }
842     ailen[row] = nrow;
843   }
844   PetscFunctionReturn(0);
845 }
846 
847 /* EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); */
848 #undef __FUNCT__
849 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
850 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
851 {
852   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
853   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
854   int        m = A->m,*ip,N,*ailen = a->ilen;
855   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
856   MatScalar  *aa = a->a,*ap;
857 #if defined(PETSC_HAVE_DSCPACK)
858   PetscTruth   flag;
859 #endif
860 
861   PetscFunctionBegin;
862   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
863 
864   if (m) rmax = ailen[0];
865   for (i=1; i<mbs; i++) {
866     /* move each row back by the amount of empty slots (fshift) before it*/
867     fshift += imax[i-1] - ailen[i-1];
868     rmax   = PetscMax(rmax,ailen[i]);
869     if (fshift) {
870       ip = aj + ai[i]; ap = aa + bs2*ai[i];
871       N = ailen[i];
872       for (j=0; j<N; j++) {
873         ip[j-fshift] = ip[j];
874         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
875       }
876     }
877     ai[i] = ai[i-1] + ailen[i-1];
878   }
879   if (mbs) {
880     fshift += imax[mbs-1] - ailen[mbs-1];
881     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
882   }
883   /* reset ilen and imax for each row */
884   for (i=0; i<mbs; i++) {
885     ailen[i] = imax[i] = ai[i+1] - ai[i];
886   }
887   a->nz = ai[mbs];
888 
889   /* diagonals may have moved, so kill the diagonal pointers */
890   if (fshift && a->diag) {
891     ierr = PetscFree(a->diag);CHKERRQ(ierr);
892     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
893     a->diag = 0;
894   }
895   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
896   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
897   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
898   a->reallocs          = 0;
899   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
900 #if defined(PETSC_HAVE_DSCPACK)
901   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);CHKERRQ(ierr);
902   if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); }
903 #endif
904 
905 
906   PetscFunctionReturn(0);
907 }
908 
909 
910 
911 /*
912    This function returns an array of flags which indicate the locations of contiguous
913    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
914    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
915    Assume: sizes should be long enough to hold all the values.
916 */
917 #undef __FUNCT__
918 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
919 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
920 {
921   int        i,j,k,row;
922   PetscTruth flg;
923 
924   PetscFunctionBegin;
925   for (i=0,j=0; i<n; j++) {
926     row = idx[i];
927     if (row%bs!=0) { /* Not the begining of a block */
928       sizes[j] = 1;
929       i++;
930     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
931       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
932       i++;
933     } else { /* Begining of the block, so check if the complete block exists */
934       flg = PETSC_TRUE;
935       for (k=1; k<bs; k++) {
936         if (row+k != idx[i+k]) { /* break in the block */
937           flg = PETSC_FALSE;
938           break;
939         }
940       }
941       if (flg == PETSC_TRUE) { /* No break in the bs */
942         sizes[j] = bs;
943         i+= bs;
944       } else {
945         sizes[j] = 1;
946         i++;
947       }
948     }
949   }
950   *bs_max = j;
951   PetscFunctionReturn(0);
952 }
953 
954 #undef __FUNCT__
955 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
956 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag)
957 {
958   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
959   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
960   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
961   PetscScalar zero = 0.0;
962   MatScalar   *aa;
963 
964   PetscFunctionBegin;
965   /* Make a copy of the IS and  sort it */
966   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
967   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
968 
969   /* allocate memory for rows,sizes */
970   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
971   sizes = rows + is_n;
972 
973   /* copy IS values to rows, and sort them */
974   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
975   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
976   if (baij->keepzeroedrows) {
977     for (i=0; i<is_n; i++) { sizes[i] = 1; }
978     bs_max = is_n;
979   } else {
980     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
981   }
982   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
983 
984   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
985     row   = rows[j];
986     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
987     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
988     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
989     if (sizes[i] == bs && !baij->keepzeroedrows) {
990       if (diag) {
991         if (baij->ilen[row/bs] > 0) {
992           baij->ilen[row/bs]       = 1;
993           baij->j[baij->i[row/bs]] = row/bs;
994           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
995         }
996         /* Now insert all the diagonal values for this bs */
997         for (k=0; k<bs; k++) {
998           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
999         }
1000       } else { /* (!diag) */
1001         baij->ilen[row/bs] = 0;
1002       } /* end (!diag) */
1003     } else { /* (sizes[i] != bs) */
1004 #if defined (PETSC_USE_DEBUG)
1005       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1006 #endif
1007       for (k=0; k<count; k++) {
1008         aa[0] =  zero;
1009         aa    += bs;
1010       }
1011       if (diag) {
1012         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1013       }
1014     }
1015   }
1016 
1017   ierr = PetscFree(rows);CHKERRQ(ierr);
1018   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 #undef __FUNCT__
1023 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1024 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
1025 {
1026   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1027   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1028   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1029   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1030   int         ridx,cidx,bs2=a->bs2,ierr;
1031   PetscTruth  roworiented=a->roworiented;
1032   MatScalar   *ap,value,*aa=a->a,*bap;
1033 
1034   PetscFunctionBegin;
1035   for (k=0; k<m; k++) { /* loop over added rows */
1036     row  = im[k]; brow = row/bs;
1037     if (row < 0) continue;
1038 #if defined(PETSC_USE_BOPT_g)
1039     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1040 #endif
1041     rp   = aj + ai[brow];
1042     ap   = aa + bs2*ai[brow];
1043     rmax = imax[brow];
1044     nrow = ailen[brow];
1045     low  = 0;
1046     for (l=0; l<n; l++) { /* loop over added columns */
1047       if (in[l] < 0) continue;
1048 #if defined(PETSC_USE_BOPT_g)
1049       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1050 #endif
1051       col = in[l]; bcol = col/bs;
1052       ridx = row % bs; cidx = col % bs;
1053       if (roworiented) {
1054         value = v[l + k*n];
1055       } else {
1056         value = v[k + l*m];
1057       }
1058       if (!sorted) low = 0; high = nrow;
1059       while (high-low > 7) {
1060         t = (low+high)/2;
1061         if (rp[t] > bcol) high = t;
1062         else              low  = t;
1063       }
1064       for (i=low; i<high; i++) {
1065         if (rp[i] > bcol) break;
1066         if (rp[i] == bcol) {
1067           bap  = ap +  bs2*i + bs*cidx + ridx;
1068           if (is == ADD_VALUES) *bap += value;
1069           else                  *bap  = value;
1070           goto noinsert1;
1071         }
1072       }
1073       if (nonew == 1) goto noinsert1;
1074       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1075       if (nrow >= rmax) {
1076         /* there is no extra room in row, therefore enlarge */
1077         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1078         MatScalar *new_a;
1079 
1080         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1081 
1082         /* Malloc new storage space */
1083         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1084 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1085         new_j   = (int*)(new_a + bs2*new_nz);
1086         new_i   = new_j + new_nz;
1087 
1088         /* copy over old data into new slots */
1089         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1090         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1091         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1092         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1093         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1094         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1095         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1096         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1097         /* free up old matrix storage */
1098         ierr = PetscFree(a->a);CHKERRQ(ierr);
1099         if (!a->singlemalloc) {
1100           ierr = PetscFree(a->i);CHKERRQ(ierr);
1101           ierr = PetscFree(a->j);CHKERRQ(ierr);
1102         }
1103         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1104         a->singlemalloc = PETSC_TRUE;
1105 
1106         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1107         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1108         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1109         a->maxnz += bs2*CHUNKSIZE;
1110         a->reallocs++;
1111         a->nz++;
1112       }
1113       N = nrow++ - 1;
1114       /* shift up all the later entries in this row */
1115       for (ii=N; ii>=i; ii--) {
1116         rp[ii+1] = rp[ii];
1117         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1118       }
1119       if (N>=i) {
1120         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1121       }
1122       rp[i]                      = bcol;
1123       ap[bs2*i + bs*cidx + ridx] = value;
1124       noinsert1:;
1125       low = i;
1126     }
1127     ailen[brow] = nrow;
1128   }
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1135 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1136 {
1137   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1138   Mat         outA;
1139   int         ierr;
1140   PetscTruth  row_identity,col_identity;
1141 
1142   PetscFunctionBegin;
1143   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1144   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1145   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1146   if (!row_identity || !col_identity) {
1147     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1148   }
1149 
1150   outA          = inA;
1151   inA->factor   = FACTOR_LU;
1152 
1153   if (!a->diag) {
1154     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1155   }
1156 
1157   a->row        = row;
1158   a->col        = col;
1159   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1160   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1161 
1162   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1163   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1164   PetscLogObjectParent(inA,a->icol);
1165 
1166   /*
1167       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1168       for ILU(0) factorization with natural ordering
1169   */
1170   if (a->bs < 8) {
1171     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1172   } else {
1173     if (!a->solve_work) {
1174       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1175       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1176     }
1177   }
1178 
1179   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1180 
1181   PetscFunctionReturn(0);
1182 }
1183 #undef __FUNCT__
1184 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1185 int MatPrintHelp_SeqBAIJ(Mat A)
1186 {
1187   static PetscTruth called = PETSC_FALSE;
1188   MPI_Comm          comm = A->comm;
1189   int               ierr;
1190 
1191   PetscFunctionBegin;
1192   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1193   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1194   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 EXTERN_C_BEGIN
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1201 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1202 {
1203   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1204   int         i,nz,nbs;
1205 
1206   PetscFunctionBegin;
1207   nz  = baij->maxnz/baij->bs2;
1208   nbs = baij->nbs;
1209   for (i=0; i<nz; i++) {
1210     baij->j[i] = indices[i];
1211   }
1212   baij->nz = nz;
1213   for (i=0; i<nbs; i++) {
1214     baij->ilen[i] = baij->imax[i];
1215   }
1216 
1217   PetscFunctionReturn(0);
1218 }
1219 EXTERN_C_END
1220 
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1223 /*@
1224     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1225        in the matrix.
1226 
1227   Input Parameters:
1228 +  mat - the SeqBAIJ matrix
1229 -  indices - the column indices
1230 
1231   Level: advanced
1232 
1233   Notes:
1234     This can be called if you have precomputed the nonzero structure of the
1235   matrix and want to provide it to the matrix object to improve the performance
1236   of the MatSetValues() operation.
1237 
1238     You MUST have set the correct numbers of nonzeros per row in the call to
1239   MatCreateSeqBAIJ().
1240 
1241     MUST be called before any calls to MatSetValues();
1242 
1243 @*/
1244 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1245 {
1246   int ierr,(*f)(Mat,int *);
1247 
1248   PetscFunctionBegin;
1249   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1250   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1251   if (f) {
1252     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1253   } else {
1254     SETERRQ(1,"Wrong type of matrix to set column indices");
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1261 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1262 {
1263   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1264   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1265   PetscReal    atmp;
1266   PetscScalar  *x,zero = 0.0;
1267   MatScalar    *aa;
1268   int          ncols,brow,krow,kcol;
1269 
1270   PetscFunctionBegin;
1271   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1272   bs   = a->bs;
1273   aa   = a->a;
1274   ai   = a->i;
1275   aj   = a->j;
1276   mbs = a->mbs;
1277 
1278   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1279   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1280   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1281   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1282   for (i=0; i<mbs; i++) {
1283     ncols = ai[1] - ai[0]; ai++;
1284     brow  = bs*i;
1285     for (j=0; j<ncols; j++){
1286       /* bcol = bs*(*aj); */
1287       for (kcol=0; kcol<bs; kcol++){
1288         for (krow=0; krow<bs; krow++){
1289           atmp = PetscAbsScalar(*aa); aa++;
1290           row = brow + krow;    /* row index */
1291           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1292           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1293         }
1294       }
1295       aj++;
1296     }
1297   }
1298   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 #undef __FUNCT__
1303 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1304 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1305 {
1306   int        ierr;
1307 
1308   PetscFunctionBegin;
1309   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1315 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array)
1316 {
1317   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1318   PetscFunctionBegin;
1319   *array = a->a;
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1325 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array)
1326 {
1327   PetscFunctionBegin;
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 /* -------------------------------------------------------------------*/
1332 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1333        MatGetRow_SeqBAIJ,
1334        MatRestoreRow_SeqBAIJ,
1335        MatMult_SeqBAIJ_N,
1336        MatMultAdd_SeqBAIJ_N,
1337        MatMultTranspose_SeqBAIJ,
1338        MatMultTransposeAdd_SeqBAIJ,
1339        MatSolve_SeqBAIJ_N,
1340        0,
1341        0,
1342        0,
1343        MatLUFactor_SeqBAIJ,
1344        0,
1345        0,
1346        MatTranspose_SeqBAIJ,
1347        MatGetInfo_SeqBAIJ,
1348        MatEqual_SeqBAIJ,
1349        MatGetDiagonal_SeqBAIJ,
1350        MatDiagonalScale_SeqBAIJ,
1351        MatNorm_SeqBAIJ,
1352        0,
1353        MatAssemblyEnd_SeqBAIJ,
1354        0,
1355        MatSetOption_SeqBAIJ,
1356        MatZeroEntries_SeqBAIJ,
1357        MatZeroRows_SeqBAIJ,
1358        MatLUFactorSymbolic_SeqBAIJ,
1359        MatLUFactorNumeric_SeqBAIJ_N,
1360        0,
1361        0,
1362        MatSetUpPreallocation_SeqBAIJ,
1363        MatILUFactorSymbolic_SeqBAIJ,
1364        0,
1365        MatGetArray_SeqBAIJ,
1366        MatRestoreArray_SeqBAIJ,
1367        MatDuplicate_SeqBAIJ,
1368        0,
1369        0,
1370        MatILUFactor_SeqBAIJ,
1371        0,
1372        0,
1373        MatGetSubMatrices_SeqBAIJ,
1374        MatIncreaseOverlap_SeqBAIJ,
1375        MatGetValues_SeqBAIJ,
1376        0,
1377        MatPrintHelp_SeqBAIJ,
1378        MatScale_SeqBAIJ,
1379        0,
1380        0,
1381        0,
1382        MatGetBlockSize_SeqBAIJ,
1383        MatGetRowIJ_SeqBAIJ,
1384        MatRestoreRowIJ_SeqBAIJ,
1385        0,
1386        0,
1387        0,
1388        0,
1389        0,
1390        0,
1391        MatSetValuesBlocked_SeqBAIJ,
1392        MatGetSubMatrix_SeqBAIJ,
1393        MatDestroy_SeqBAIJ,
1394        MatView_SeqBAIJ,
1395        MatGetPetscMaps_Petsc,
1396        0,
1397        0,
1398        0,
1399        0,
1400        0,
1401        0,
1402        MatGetRowMax_SeqBAIJ,
1403        MatConvert_Basic};
1404 
1405 EXTERN_C_BEGIN
1406 #undef __FUNCT__
1407 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1408 int MatStoreValues_SeqBAIJ(Mat mat)
1409 {
1410   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1411   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1412   int         ierr;
1413 
1414   PetscFunctionBegin;
1415   if (aij->nonew != 1) {
1416     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1417   }
1418 
1419   /* allocate space for values if not already there */
1420   if (!aij->saved_values) {
1421     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1422   }
1423 
1424   /* copy values over */
1425   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 EXTERN_C_END
1429 
1430 EXTERN_C_BEGIN
1431 #undef __FUNCT__
1432 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1433 int MatRetrieveValues_SeqBAIJ(Mat mat)
1434 {
1435   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1436   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1437 
1438   PetscFunctionBegin;
1439   if (aij->nonew != 1) {
1440     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1441   }
1442   if (!aij->saved_values) {
1443     SETERRQ(1,"Must call MatStoreValues(A);first");
1444   }
1445 
1446   /* copy values over */
1447   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1448   PetscFunctionReturn(0);
1449 }
1450 EXTERN_C_END
1451 
1452 EXTERN_C_BEGIN
1453 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1454 EXTERN_C_END
1455 
1456 EXTERN_C_BEGIN
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatCreate_SeqBAIJ"
1459 int MatCreate_SeqBAIJ(Mat B)
1460 {
1461   int         ierr,size;
1462   Mat_SeqBAIJ *b;
1463 
1464   PetscFunctionBegin;
1465   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1466   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1467 
1468   B->m = B->M = PetscMax(B->m,B->M);
1469   B->n = B->N = PetscMax(B->n,B->N);
1470   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1471   B->data = (void*)b;
1472   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1473   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1474   B->factor           = 0;
1475   B->lupivotthreshold = 1.0;
1476   B->mapping          = 0;
1477   b->row              = 0;
1478   b->col              = 0;
1479   b->icol             = 0;
1480   b->reallocs         = 0;
1481   b->saved_values     = 0;
1482 #if defined(PETSC_USE_MAT_SINGLE)
1483   b->setvalueslen     = 0;
1484   b->setvaluescopy    = PETSC_NULL;
1485 #endif
1486   b->single_precision_solves = PETSC_FALSE;
1487 
1488   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1489   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1490 
1491   b->sorted           = PETSC_FALSE;
1492   b->roworiented      = PETSC_TRUE;
1493   b->nonew            = 0;
1494   b->diag             = 0;
1495   b->solve_work       = 0;
1496   b->mult_work        = 0;
1497   B->spptr            = 0;
1498   B->info.nz_unneeded = (PetscReal)b->maxnz;
1499   b->keepzeroedrows   = PETSC_FALSE;
1500 
1501   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1502                                      "MatStoreValues_SeqBAIJ",
1503                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1504   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1505                                      "MatRetrieveValues_SeqBAIJ",
1506                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1507   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1508                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1509                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1510   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1511                                      "MatConvert_SeqBAIJ_SeqAIJ",
1512                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1513   PetscFunctionReturn(0);
1514 }
1515 EXTERN_C_END
1516 
1517 #undef __FUNCT__
1518 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1519 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1520 {
1521   Mat         C;
1522   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1523   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1524 
1525   PetscFunctionBegin;
1526   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1527 
1528   *B = 0;
1529   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1530   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1531   c    = (Mat_SeqBAIJ*)C->data;
1532 
1533   c->bs         = a->bs;
1534   c->bs2        = a->bs2;
1535   c->mbs        = a->mbs;
1536   c->nbs        = a->nbs;
1537   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1538 
1539   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1540   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1541   for (i=0; i<mbs; i++) {
1542     c->imax[i] = a->imax[i];
1543     c->ilen[i] = a->ilen[i];
1544   }
1545 
1546   /* allocate the matrix space */
1547   c->singlemalloc = PETSC_TRUE;
1548   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1549   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1550   c->j = (int*)(c->a + nz*bs2);
1551   c->i = c->j + nz;
1552   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1553   if (mbs > 0) {
1554     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1555     if (cpvalues == MAT_COPY_VALUES) {
1556       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1557     } else {
1558       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1559     }
1560   }
1561 
1562   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1563   c->sorted      = a->sorted;
1564   c->roworiented = a->roworiented;
1565   c->nonew       = a->nonew;
1566 
1567   if (a->diag) {
1568     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1569     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1570     for (i=0; i<mbs; i++) {
1571       c->diag[i] = a->diag[i];
1572     }
1573   } else c->diag        = 0;
1574   c->nz                 = a->nz;
1575   c->maxnz              = a->maxnz;
1576   c->solve_work         = 0;
1577   C->spptr              = 0;     /* Dangerous -I'm throwing away a->spptr */
1578   c->mult_work          = 0;
1579   C->preallocated       = PETSC_TRUE;
1580   C->assembled          = PETSC_TRUE;
1581   *B = C;
1582   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 EXTERN_C_BEGIN
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MatLoad_SeqBAIJ"
1589 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1590 {
1591   Mat_SeqBAIJ  *a;
1592   Mat          B;
1593   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1594   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1595   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1596   int          *masked,nmask,tmp,bs2,ishift;
1597   PetscScalar  *aa;
1598   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1599 
1600   PetscFunctionBegin;
1601   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1602   bs2  = bs*bs;
1603 
1604   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1605   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1606   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1607   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1608   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1609   M = header[1]; N = header[2]; nz = header[3];
1610 
1611   if (header[3] < 0) {
1612     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1613   }
1614 
1615   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1616 
1617   /*
1618      This code adds extra rows to make sure the number of rows is
1619     divisible by the blocksize
1620   */
1621   mbs        = M/bs;
1622   extra_rows = bs - M + bs*(mbs);
1623   if (extra_rows == bs) extra_rows = 0;
1624   else                  mbs++;
1625   if (extra_rows) {
1626     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1627   }
1628 
1629   /* read in row lengths */
1630   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1631   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1632   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1633 
1634   /* read in column indices */
1635   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1636   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1637   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1638 
1639   /* loop over row lengths determining block row lengths */
1640   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1641   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1642   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1643   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1644   masked   = mask + mbs;
1645   rowcount = 0; nzcount = 0;
1646   for (i=0; i<mbs; i++) {
1647     nmask = 0;
1648     for (j=0; j<bs; j++) {
1649       kmax = rowlengths[rowcount];
1650       for (k=0; k<kmax; k++) {
1651         tmp = jj[nzcount++]/bs;
1652         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1653       }
1654       rowcount++;
1655     }
1656     browlengths[i] += nmask;
1657     /* zero out the mask elements we set */
1658     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1659   }
1660 
1661   /* create our matrix */
1662   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1663   B = *A;
1664   a = (Mat_SeqBAIJ*)B->data;
1665 
1666   /* set matrix "i" values */
1667   a->i[0] = 0;
1668   for (i=1; i<= mbs; i++) {
1669     a->i[i]      = a->i[i-1] + browlengths[i-1];
1670     a->ilen[i-1] = browlengths[i-1];
1671   }
1672   a->nz         = 0;
1673   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1674 
1675   /* read in nonzero values */
1676   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1677   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1678   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1679 
1680   /* set "a" and "j" values into matrix */
1681   nzcount = 0; jcount = 0;
1682   for (i=0; i<mbs; i++) {
1683     nzcountb = nzcount;
1684     nmask    = 0;
1685     for (j=0; j<bs; j++) {
1686       kmax = rowlengths[i*bs+j];
1687       for (k=0; k<kmax; k++) {
1688         tmp = jj[nzcount++]/bs;
1689 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1690       }
1691     }
1692     /* sort the masked values */
1693     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1694 
1695     /* set "j" values into matrix */
1696     maskcount = 1;
1697     for (j=0; j<nmask; j++) {
1698       a->j[jcount++]  = masked[j];
1699       mask[masked[j]] = maskcount++;
1700     }
1701     /* set "a" values into matrix */
1702     ishift = bs2*a->i[i];
1703     for (j=0; j<bs; j++) {
1704       kmax = rowlengths[i*bs+j];
1705       for (k=0; k<kmax; k++) {
1706         tmp       = jj[nzcountb]/bs ;
1707         block     = mask[tmp] - 1;
1708         point     = jj[nzcountb] - bs*tmp;
1709         idx       = ishift + bs2*block + j + bs*point;
1710         a->a[idx] = (MatScalar)aa[nzcountb++];
1711       }
1712     }
1713     /* zero out the mask elements we set */
1714     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1715   }
1716   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1717 
1718   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1719   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1720   ierr = PetscFree(aa);CHKERRQ(ierr);
1721   ierr = PetscFree(jj);CHKERRQ(ierr);
1722   ierr = PetscFree(mask);CHKERRQ(ierr);
1723 
1724   B->assembled = PETSC_TRUE;
1725 
1726   ierr = MatView_Private(B);CHKERRQ(ierr);
1727   PetscFunctionReturn(0);
1728 }
1729 EXTERN_C_END
1730 
1731 #undef __FUNCT__
1732 #define __FUNCT__ "MatCreateSeqBAIJ"
1733 /*@C
1734    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1735    compressed row) format.  For good matrix assembly performance the
1736    user should preallocate the matrix storage by setting the parameter nz
1737    (or the array nnz).  By setting these parameters accurately, performance
1738    during matrix assembly can be increased by more than a factor of 50.
1739 
1740    Collective on MPI_Comm
1741 
1742    Input Parameters:
1743 +  comm - MPI communicator, set to PETSC_COMM_SELF
1744 .  bs - size of block
1745 .  m - number of rows
1746 .  n - number of columns
1747 .  nz - number of nonzero blocks  per block row (same for all rows)
1748 -  nnz - array containing the number of nonzero blocks in the various block rows
1749          (possibly different for each block row) or PETSC_NULL
1750 
1751    Output Parameter:
1752 .  A - the matrix
1753 
1754    Options Database Keys:
1755 .   -mat_no_unroll - uses code that does not unroll the loops in the
1756                      block calculations (much slower)
1757 .    -mat_block_size - size of the blocks to use
1758 
1759    Level: intermediate
1760 
1761    Notes:
1762    A nonzero block is any block that as 1 or more nonzeros in it
1763 
1764    The block AIJ format is fully compatible with standard Fortran 77
1765    storage.  That is, the stored row and column indices can begin at
1766    either one (as in Fortran) or zero.  See the users' manual for details.
1767 
1768    Specify the preallocated storage with either nz or nnz (not both).
1769    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1770    allocation.  For additional details, see the users manual chapter on
1771    matrices.
1772 
1773 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1774 @*/
1775 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1776 {
1777   int ierr;
1778 
1779   PetscFunctionBegin;
1780   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1781   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
1782   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1783   PetscFunctionReturn(0);
1784 }
1785 
1786 #undef __FUNCT__
1787 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
1788 /*@C
1789    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1790    per row in the matrix. For good matrix assembly performance the
1791    user should preallocate the matrix storage by setting the parameter nz
1792    (or the array nnz).  By setting these parameters accurately, performance
1793    during matrix assembly can be increased by more than a factor of 50.
1794 
1795    Collective on MPI_Comm
1796 
1797    Input Parameters:
1798 +  A - the matrix
1799 .  bs - size of block
1800 .  nz - number of block nonzeros per block row (same for all rows)
1801 -  nnz - array containing the number of block nonzeros in the various block rows
1802          (possibly different for each block row) or PETSC_NULL
1803 
1804    Options Database Keys:
1805 .   -mat_no_unroll - uses code that does not unroll the loops in the
1806                      block calculations (much slower)
1807 .    -mat_block_size - size of the blocks to use
1808 
1809    Level: intermediate
1810 
1811    Notes:
1812    The block AIJ format is fully compatible with standard Fortran 77
1813    storage.  That is, the stored row and column indices can begin at
1814    either one (as in Fortran) or zero.  See the users' manual for details.
1815 
1816    Specify the preallocated storage with either nz or nnz (not both).
1817    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1818    allocation.  For additional details, see the users manual chapter on
1819    matrices.
1820 
1821 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1822 @*/
1823 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1824 {
1825   Mat_SeqBAIJ *b;
1826   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1827   PetscTruth  flg;
1828 
1829   PetscFunctionBegin;
1830   ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr);
1831   if (!flg) PetscFunctionReturn(0);
1832 
1833   B->preallocated = PETSC_TRUE;
1834   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1835   if (nnz && newbs != bs) {
1836     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1837   }
1838   bs   = newbs;
1839 
1840   mbs  = B->m/bs;
1841   nbs  = B->n/bs;
1842   bs2  = bs*bs;
1843 
1844   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1845     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1846   }
1847 
1848   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1849   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1850   if (nnz) {
1851     for (i=0; i<mbs; i++) {
1852       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1853       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1854     }
1855   }
1856 
1857   b       = (Mat_SeqBAIJ*)B->data;
1858   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1859   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1860   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1861   if (!flg) {
1862     switch (bs) {
1863     case 1:
1864       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1865       B->ops->mult            = MatMult_SeqBAIJ_1;
1866       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1867       break;
1868     case 2:
1869       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1870       B->ops->mult            = MatMult_SeqBAIJ_2;
1871       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1872       break;
1873     case 3:
1874       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1875       B->ops->mult            = MatMult_SeqBAIJ_3;
1876       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1877       break;
1878     case 4:
1879       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1880       B->ops->mult            = MatMult_SeqBAIJ_4;
1881       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1882       break;
1883     case 5:
1884       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1885       B->ops->mult            = MatMult_SeqBAIJ_5;
1886       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1887       break;
1888     case 6:
1889       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1890       B->ops->mult            = MatMult_SeqBAIJ_6;
1891       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1892       break;
1893     case 7:
1894       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1895       B->ops->mult            = MatMult_SeqBAIJ_7;
1896       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1897       break;
1898     default:
1899       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1900       B->ops->mult            = MatMult_SeqBAIJ_N;
1901       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1902       break;
1903     }
1904   }
1905   b->bs      = bs;
1906   b->mbs     = mbs;
1907   b->nbs     = nbs;
1908   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1909   if (!nnz) {
1910     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1911     else if (nz <= 0)        nz = 1;
1912     for (i=0; i<mbs; i++) b->imax[i] = nz;
1913     nz = nz*mbs;
1914   } else {
1915     nz = 0;
1916     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1917   }
1918 
1919   /* allocate the matrix space */
1920   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1921   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1922   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1923   b->j  = (int*)(b->a + nz*bs2);
1924   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1925   b->i  = b->j + nz;
1926   b->singlemalloc = PETSC_TRUE;
1927 
1928   b->i[0] = 0;
1929   for (i=1; i<mbs+1; i++) {
1930     b->i[i] = b->i[i-1] + b->imax[i-1];
1931   }
1932 
1933   /* b->ilen will count nonzeros in each block row so far. */
1934   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1935   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1936   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1937 
1938   b->bs               = bs;
1939   b->bs2              = bs2;
1940   b->mbs              = mbs;
1941   b->nz               = 0;
1942   b->maxnz            = nz*bs2;
1943   B->info.nz_unneeded = (PetscReal)b->maxnz;
1944   PetscFunctionReturn(0);
1945 }
1946 
1947