xref: /petsc/src/mat/impls/baij/seq/baij.c (revision f6cc79bd0334691d264e95eacfaee85c3da0d04a)
1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/vec/vecimpl.h"
9 #include "src/inline/spops.h"
10 #include "petscsys.h"                     /*I "petscmat.h" I*/
11 
12 /*  UGLY, ugly, ugly
13    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
14    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
15    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
16    converts the entries into single precision and then calls ..._MatScalar() to put them
17    into the single precision data structures.
18 */
19 #if defined(PETSC_USE_MAT_SINGLE)
20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
21 #else
22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
23 #endif
24 
25 #define CHUNKSIZE  10
26 
27 /*
28      Checks for missing diagonals
29 */
30 #undef __FUNCT__
31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
32 int MatMissingDiagonal_SeqBAIJ(Mat A)
33 {
34   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
35   int         *diag,*jj = a->j,i,ierr;
36 
37   PetscFunctionBegin;
38   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
39   diag = a->diag;
40   for (i=0; i<a->mbs; i++) {
41     if (jj[diag[i]] != i) {
42       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
43     }
44   }
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
50 int MatMarkDiagonal_SeqBAIJ(Mat A)
51 {
52   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
53   int         i,j,*diag,m = a->mbs,ierr;
54 
55   PetscFunctionBegin;
56   if (a->diag) PetscFunctionReturn(0);
57 
58   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
59   PetscLogObjectMemory(A,(m+1)*sizeof(int));
60   for (i=0; i<m; i++) {
61     diag[i] = a->i[i+1];
62     for (j=a->i[i]; j<a->i[i+1]; j++) {
63       if (a->j[j] == i) {
64         diag[i] = j;
65         break;
66       }
67     }
68   }
69   a->diag = diag;
70   PetscFunctionReturn(0);
71 }
72 
73 
74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
79 {
80   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
81   int         ierr,n = a->mbs,i;
82 
83   PetscFunctionBegin;
84   *nn = n;
85   if (!ia) PetscFunctionReturn(0);
86   if (symmetric) {
87     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
88   } else if (oshift == 1) {
89     /* temporarily add 1 to i and j indices */
90     int nz = a->i[n];
91     for (i=0; i<nz; i++) a->j[i]++;
92     for (i=0; i<n+1; i++) a->i[i]++;
93     *ia = a->i; *ja = a->j;
94   } else {
95     *ia = a->i; *ja = a->j;
96   }
97 
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
104 {
105   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
106   int         i,n = a->mbs,ierr;
107 
108   PetscFunctionBegin;
109   if (!ia) PetscFunctionReturn(0);
110   if (symmetric) {
111     ierr = PetscFree(*ia);CHKERRQ(ierr);
112     ierr = PetscFree(*ja);CHKERRQ(ierr);
113   } else if (oshift == 1) {
114     int nz = a->i[n]-1;
115     for (i=0; i<nz; i++) a->j[i]--;
116     for (i=0; i<n+1; i++) a->i[i]--;
117   }
118   PetscFunctionReturn(0);
119 }
120 
121 #undef __FUNCT__
122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
124 {
125   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
126 
127   PetscFunctionBegin;
128   *bs = baij->bs;
129   PetscFunctionReturn(0);
130 }
131 
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatDestroy_SeqBAIJ"
135 int MatDestroy_SeqBAIJ(Mat A)
136 {
137   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
138   int         ierr;
139 
140   PetscFunctionBegin;
141 #if defined(PETSC_USE_LOG)
142   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
143 #endif
144   ierr = PetscFree(a->a);CHKERRQ(ierr);
145   if (!a->singlemalloc) {
146     ierr = PetscFree(a->i);CHKERRQ(ierr);
147     ierr = PetscFree(a->j);CHKERRQ(ierr);
148   }
149   if (a->row) {
150     ierr = ISDestroy(a->row);CHKERRQ(ierr);
151   }
152   if (a->col) {
153     ierr = ISDestroy(a->col);CHKERRQ(ierr);
154   }
155   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
156   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
157   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
158   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
159   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
160   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
161   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
162 #if defined(PETSC_USE_MAT_SINGLE)
163   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
164 #endif
165   ierr = PetscFree(a);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatSetOption_SeqBAIJ"
171 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
172 {
173   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
174 
175   PetscFunctionBegin;
176   switch (op) {
177   case MAT_ROW_ORIENTED:
178     a->roworiented    = PETSC_TRUE;
179     break;
180   case MAT_COLUMN_ORIENTED:
181     a->roworiented    = PETSC_FALSE;
182     break;
183   case MAT_COLUMNS_SORTED:
184     a->sorted         = PETSC_TRUE;
185     break;
186   case MAT_COLUMNS_UNSORTED:
187     a->sorted         = PETSC_FALSE;
188     break;
189   case MAT_KEEP_ZEROED_ROWS:
190     a->keepzeroedrows = PETSC_TRUE;
191     break;
192   case MAT_NO_NEW_NONZERO_LOCATIONS:
193     a->nonew          = 1;
194     break;
195   case MAT_NEW_NONZERO_LOCATION_ERR:
196     a->nonew          = -1;
197     break;
198   case MAT_NEW_NONZERO_ALLOCATION_ERR:
199     a->nonew          = -2;
200     break;
201   case MAT_YES_NEW_NONZERO_LOCATIONS:
202     a->nonew          = 0;
203     break;
204   case MAT_ROWS_SORTED:
205   case MAT_ROWS_UNSORTED:
206   case MAT_YES_NEW_DIAGONALS:
207   case MAT_IGNORE_OFF_PROC_ENTRIES:
208   case MAT_USE_HASH_TABLE:
209     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
210     break;
211   case MAT_NO_NEW_DIAGONALS:
212     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
213     break;
214   case MAT_USE_SINGLE_PRECISION_SOLVES:
215     if (a->bs==4) {
216       PetscTruth sse_enabled_local,sse_enabled_global;
217       int        ierr;
218 
219       sse_enabled_local  = PETSC_FALSE;
220       sse_enabled_global = PETSC_FALSE;
221 
222       ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr);
223 #if defined(PETSC_HAVE_SSE)
224       if (sse_enabled_local) {
225           a->single_precision_solves = PETSC_TRUE;
226           A->ops->solve              = MatSolve_SeqBAIJ_Update;
227           A->ops->solvetranspose     = MatSolveTranspose_SeqBAIJ_Update;
228           PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n");
229           break;
230       } else {
231         PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
232       }
233 #else
234       PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
235 #endif
236     }
237     break;
238   default:
239     SETERRQ(PETSC_ERR_SUP,"unknown option");
240   }
241   PetscFunctionReturn(0);
242 }
243 
244 #undef __FUNCT__
245 #define __FUNCT__ "MatGetRow_SeqBAIJ"
246 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
247 {
248   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
249   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
250   MatScalar    *aa,*aa_i;
251   PetscScalar  *v_i;
252 
253   PetscFunctionBegin;
254   bs  = a->bs;
255   ai  = a->i;
256   aj  = a->j;
257   aa  = a->a;
258   bs2 = a->bs2;
259 
260   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
261 
262   bn  = row/bs;   /* Block number */
263   bp  = row % bs; /* Block Position */
264   M   = ai[bn+1] - ai[bn];
265   *nz = bs*M;
266 
267   if (v) {
268     *v = 0;
269     if (*nz) {
270       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
271       for (i=0; i<M; i++) { /* for each block in the block row */
272         v_i  = *v + i*bs;
273         aa_i = aa + bs2*(ai[bn] + i);
274         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
275       }
276     }
277   }
278 
279   if (idx) {
280     *idx = 0;
281     if (*nz) {
282       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
283       for (i=0; i<M; i++) { /* for each block in the block row */
284         idx_i = *idx + i*bs;
285         itmp  = bs*aj[ai[bn] + i];
286         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
287       }
288     }
289   }
290   PetscFunctionReturn(0);
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
295 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
296 {
297   int ierr;
298 
299   PetscFunctionBegin;
300   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
301   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
302   PetscFunctionReturn(0);
303 }
304 
305 #undef __FUNCT__
306 #define __FUNCT__ "MatTranspose_SeqBAIJ"
307 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
308 {
309   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
310   Mat         C;
311   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
312   int         *rows,*cols,bs2=a->bs2;
313   PetscScalar *array;
314 
315   PetscFunctionBegin;
316   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
317   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
318   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
319 
320 #if defined(PETSC_USE_MAT_SINGLE)
321   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
322   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
323 #else
324   array = a->a;
325 #endif
326 
327   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
328   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
329   ierr = PetscFree(col);CHKERRQ(ierr);
330   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
331   cols = rows + bs;
332   for (i=0; i<mbs; i++) {
333     cols[0] = i*bs;
334     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
335     len = ai[i+1] - ai[i];
336     for (j=0; j<len; j++) {
337       rows[0] = (*aj++)*bs;
338       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
339       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
340       array += bs2;
341     }
342   }
343   ierr = PetscFree(rows);CHKERRQ(ierr);
344 #if defined(PETSC_USE_MAT_SINGLE)
345   ierr = PetscFree(array);
346 #endif
347 
348   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
349   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
350 
351   if (B) {
352     *B = C;
353   } else {
354     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
355   }
356   PetscFunctionReturn(0);
357 }
358 
359 #undef __FUNCT__
360 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
361 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
362 {
363   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
364   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
365   PetscScalar *aa;
366   FILE        *file;
367 
368   PetscFunctionBegin;
369   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
370   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
371   col_lens[0] = MAT_FILE_COOKIE;
372 
373   col_lens[1] = A->m;
374   col_lens[2] = A->n;
375   col_lens[3] = a->nz*bs2;
376 
377   /* store lengths of each row and write (including header) to file */
378   count = 0;
379   for (i=0; i<a->mbs; i++) {
380     for (j=0; j<bs; j++) {
381       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
382     }
383   }
384   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
385   ierr = PetscFree(col_lens);CHKERRQ(ierr);
386 
387   /* store column indices (zero start index) */
388   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
389   count = 0;
390   for (i=0; i<a->mbs; i++) {
391     for (j=0; j<bs; j++) {
392       for (k=a->i[i]; k<a->i[i+1]; k++) {
393         for (l=0; l<bs; l++) {
394           jj[count++] = bs*a->j[k] + l;
395         }
396       }
397     }
398   }
399   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
400   ierr = PetscFree(jj);CHKERRQ(ierr);
401 
402   /* store nonzero values */
403   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
404   count = 0;
405   for (i=0; i<a->mbs; i++) {
406     for (j=0; j<bs; j++) {
407       for (k=a->i[i]; k<a->i[i+1]; k++) {
408         for (l=0; l<bs; l++) {
409           aa[count++] = a->a[bs2*k + l*bs + j];
410         }
411       }
412     }
413   }
414   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
415   ierr = PetscFree(aa);CHKERRQ(ierr);
416 
417   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
418   if (file) {
419     fprintf(file,"-matload_block_size %d\n",a->bs);
420   }
421   PetscFunctionReturn(0);
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
426 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
427 {
428   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
429   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
430   PetscViewerFormat format;
431 
432   PetscFunctionBegin;
433   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
434   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
435     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
436   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
437     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
438   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
439     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
440     for (i=0; i<a->mbs; i++) {
441       for (j=0; j<bs; j++) {
442         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
443         for (k=a->i[i]; k<a->i[i+1]; k++) {
444           for (l=0; l<bs; l++) {
445 #if defined(PETSC_USE_COMPLEX)
446             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
447               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l,
448                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
449             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
450               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l,
451                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
452             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
453               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
454             }
455 #else
456             if (a->a[bs2*k + l*bs + j] != 0.0) {
457               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
458             }
459 #endif
460           }
461         }
462         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
463       }
464     }
465     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
466   } else {
467     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
468     for (i=0; i<a->mbs; i++) {
469       for (j=0; j<bs; j++) {
470         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
471         for (k=a->i[i]; k<a->i[i+1]; k++) {
472           for (l=0; l<bs; l++) {
473 #if defined(PETSC_USE_COMPLEX)
474             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
475               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
476                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
477             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
478               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
479                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
480             } else {
481               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
482             }
483 #else
484             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
485 #endif
486           }
487         }
488         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
489       }
490     }
491     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
492   }
493   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNCT__
498 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
499 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
500 {
501   Mat          A = (Mat) Aa;
502   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
503   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
504   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
505   MatScalar    *aa;
506   PetscViewer  viewer;
507 
508   PetscFunctionBegin;
509 
510   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
511   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
512 
513   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
514 
515   /* loop over matrix elements drawing boxes */
516   color = PETSC_DRAW_BLUE;
517   for (i=0,row=0; i<mbs; i++,row+=bs) {
518     for (j=a->i[i]; j<a->i[i+1]; j++) {
519       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
520       x_l = a->j[j]*bs; x_r = x_l + 1.0;
521       aa = a->a + j*bs2;
522       for (k=0; k<bs; k++) {
523         for (l=0; l<bs; l++) {
524           if (PetscRealPart(*aa++) >=  0.) continue;
525           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
526         }
527       }
528     }
529   }
530   color = PETSC_DRAW_CYAN;
531   for (i=0,row=0; i<mbs; i++,row+=bs) {
532     for (j=a->i[i]; j<a->i[i+1]; j++) {
533       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
534       x_l = a->j[j]*bs; x_r = x_l + 1.0;
535       aa = a->a + j*bs2;
536       for (k=0; k<bs; k++) {
537         for (l=0; l<bs; l++) {
538           if (PetscRealPart(*aa++) != 0.) continue;
539           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
540         }
541       }
542     }
543   }
544 
545   color = PETSC_DRAW_RED;
546   for (i=0,row=0; i<mbs; i++,row+=bs) {
547     for (j=a->i[i]; j<a->i[i+1]; j++) {
548       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
549       x_l = a->j[j]*bs; x_r = x_l + 1.0;
550       aa = a->a + j*bs2;
551       for (k=0; k<bs; k++) {
552         for (l=0; l<bs; l++) {
553           if (PetscRealPart(*aa++) <= 0.) continue;
554           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
555         }
556       }
557     }
558   }
559   PetscFunctionReturn(0);
560 }
561 
562 #undef __FUNCT__
563 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
564 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
565 {
566   int          ierr;
567   PetscReal    xl,yl,xr,yr,w,h;
568   PetscDraw    draw;
569   PetscTruth   isnull;
570 
571   PetscFunctionBegin;
572 
573   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
574   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
575 
576   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
577   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
578   xr += w;    yr += h;  xl = -w;     yl = -h;
579   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
580   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
581   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
582   PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNCT__
586 #define __FUNCT__ "MatView_SeqBAIJ"
587 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
588 {
589   int        ierr;
590   PetscTruth issocket,isascii,isbinary,isdraw;
591 
592   PetscFunctionBegin;
593   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
594   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
595   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
596   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
597   if (issocket) {
598     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
599   } else if (isascii){
600     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
601   } else if (isbinary) {
602     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
603   } else if (isdraw) {
604     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
605   } else {
606     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
607   }
608   PetscFunctionReturn(0);
609 }
610 
611 
612 #undef __FUNCT__
613 #define __FUNCT__ "MatGetValues_SeqBAIJ"
614 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
615 {
616   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
617   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
618   int        *ai = a->i,*ailen = a->ilen;
619   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
620   MatScalar  *ap,*aa = a->a,zero = 0.0;
621 
622   PetscFunctionBegin;
623   for (k=0; k<m; k++) { /* loop over rows */
624     row  = im[k]; brow = row/bs;
625     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
626     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
627     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
628     nrow = ailen[brow];
629     for (l=0; l<n; l++) { /* loop over columns */
630       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
631       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
632       col  = in[l] ;
633       bcol = col/bs;
634       cidx = col%bs;
635       ridx = row%bs;
636       high = nrow;
637       low  = 0; /* assume unsorted */
638       while (high-low > 5) {
639         t = (low+high)/2;
640         if (rp[t] > bcol) high = t;
641         else             low  = t;
642       }
643       for (i=low; i<high; i++) {
644         if (rp[i] > bcol) break;
645         if (rp[i] == bcol) {
646           *v++ = ap[bs2*i+bs*cidx+ridx];
647           goto finished;
648         }
649       }
650       *v++ = zero;
651       finished:;
652     }
653   }
654   PetscFunctionReturn(0);
655 }
656 
657 #if defined(PETSC_USE_MAT_SINGLE)
658 #undef __FUNCT__
659 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
660 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
661 {
662   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
663   int         ierr,i,N = m*n*b->bs2;
664   MatScalar   *vsingle;
665 
666   PetscFunctionBegin;
667   if (N > b->setvalueslen) {
668     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
669     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
670     b->setvalueslen  = N;
671   }
672   vsingle = b->setvaluescopy;
673   for (i=0; i<N; i++) {
674     vsingle[i] = v[i];
675   }
676   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 #endif
680 
681 
682 #undef __FUNCT__
683 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
684 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
685 {
686   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
687   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
688   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
689   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
690   PetscTruth  roworiented=a->roworiented;
691   MatScalar   *value = v,*ap,*aa = a->a,*bap;
692 
693   PetscFunctionBegin;
694   if (roworiented) {
695     stepval = (n-1)*bs;
696   } else {
697     stepval = (m-1)*bs;
698   }
699   for (k=0; k<m; k++) { /* loop over added rows */
700     row  = im[k];
701     if (row < 0) continue;
702 #if defined(PETSC_USE_BOPT_g)
703     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
704 #endif
705     rp   = aj + ai[row];
706     ap   = aa + bs2*ai[row];
707     rmax = imax[row];
708     nrow = ailen[row];
709     low  = 0;
710     for (l=0; l<n; l++) { /* loop over added columns */
711       if (in[l] < 0) continue;
712 #if defined(PETSC_USE_BOPT_g)
713       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
714 #endif
715       col = in[l];
716       if (roworiented) {
717         value = v + k*(stepval+bs)*bs + l*bs;
718       } else {
719         value = v + l*(stepval+bs)*bs + k*bs;
720       }
721       if (!sorted) low = 0; high = nrow;
722       while (high-low > 7) {
723         t = (low+high)/2;
724         if (rp[t] > col) high = t;
725         else             low  = t;
726       }
727       for (i=low; i<high; i++) {
728         if (rp[i] > col) break;
729         if (rp[i] == col) {
730           bap  = ap +  bs2*i;
731           if (roworiented) {
732             if (is == ADD_VALUES) {
733               for (ii=0; ii<bs; ii++,value+=stepval) {
734                 for (jj=ii; jj<bs2; jj+=bs) {
735                   bap[jj] += *value++;
736                 }
737               }
738             } else {
739               for (ii=0; ii<bs; ii++,value+=stepval) {
740                 for (jj=ii; jj<bs2; jj+=bs) {
741                   bap[jj] = *value++;
742                 }
743               }
744             }
745           } else {
746             if (is == ADD_VALUES) {
747               for (ii=0; ii<bs; ii++,value+=stepval) {
748                 for (jj=0; jj<bs; jj++) {
749                   *bap++ += *value++;
750                 }
751               }
752             } else {
753               for (ii=0; ii<bs; ii++,value+=stepval) {
754                 for (jj=0; jj<bs; jj++) {
755                   *bap++  = *value++;
756                 }
757               }
758             }
759           }
760           goto noinsert2;
761         }
762       }
763       if (nonew == 1) goto noinsert2;
764       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
765       if (nrow >= rmax) {
766         /* there is no extra room in row, therefore enlarge */
767         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
768         MatScalar *new_a;
769 
770         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
771 
772         /* malloc new storage space */
773         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
774 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
775         new_j   = (int*)(new_a + bs2*new_nz);
776         new_i   = new_j + new_nz;
777 
778         /* copy over old data into new slots */
779         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
780         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
781         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
782         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
783         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
784         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
785         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
786         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
787         /* free up old matrix storage */
788         ierr = PetscFree(a->a);CHKERRQ(ierr);
789         if (!a->singlemalloc) {
790           ierr = PetscFree(a->i);CHKERRQ(ierr);
791           ierr = PetscFree(a->j);CHKERRQ(ierr);
792         }
793         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
794         a->singlemalloc = PETSC_TRUE;
795 
796         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
797         rmax = imax[row] = imax[row] + CHUNKSIZE;
798         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
799         a->maxnz += bs2*CHUNKSIZE;
800         a->reallocs++;
801         a->nz++;
802       }
803       N = nrow++ - 1;
804       /* shift up all the later entries in this row */
805       for (ii=N; ii>=i; ii--) {
806         rp[ii+1] = rp[ii];
807         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
808       }
809       if (N >= i) {
810         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
811       }
812       rp[i] = col;
813       bap   = ap +  bs2*i;
814       if (roworiented) {
815         for (ii=0; ii<bs; ii++,value+=stepval) {
816           for (jj=ii; jj<bs2; jj+=bs) {
817             bap[jj] = *value++;
818           }
819         }
820       } else {
821         for (ii=0; ii<bs; ii++,value+=stepval) {
822           for (jj=0; jj<bs; jj++) {
823             *bap++  = *value++;
824           }
825         }
826       }
827       noinsert2:;
828       low = i;
829     }
830     ailen[row] = nrow;
831   }
832   PetscFunctionReturn(0);
833 }
834 
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
838 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
839 {
840   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
841   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
842   int        m = A->m,*ip,N,*ailen = a->ilen;
843   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
844   MatScalar  *aa = a->a,*ap;
845 
846   PetscFunctionBegin;
847   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
848 
849   if (m) rmax = ailen[0];
850   for (i=1; i<mbs; i++) {
851     /* move each row back by the amount of empty slots (fshift) before it*/
852     fshift += imax[i-1] - ailen[i-1];
853     rmax   = PetscMax(rmax,ailen[i]);
854     if (fshift) {
855       ip = aj + ai[i]; ap = aa + bs2*ai[i];
856       N = ailen[i];
857       for (j=0; j<N; j++) {
858         ip[j-fshift] = ip[j];
859         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
860       }
861     }
862     ai[i] = ai[i-1] + ailen[i-1];
863   }
864   if (mbs) {
865     fshift += imax[mbs-1] - ailen[mbs-1];
866     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
867   }
868   /* reset ilen and imax for each row */
869   for (i=0; i<mbs; i++) {
870     ailen[i] = imax[i] = ai[i+1] - ai[i];
871   }
872   a->nz = ai[mbs];
873 
874   /* diagonals may have moved, so kill the diagonal pointers */
875   if (fshift && a->diag) {
876     ierr = PetscFree(a->diag);CHKERRQ(ierr);
877     PetscLogObjectMemory(A,-(m+1)*sizeof(int));
878     a->diag = 0;
879   }
880   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
881   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
882   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
883   a->reallocs          = 0;
884   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
885 
886   PetscFunctionReturn(0);
887 }
888 
889 
890 
891 /*
892    This function returns an array of flags which indicate the locations of contiguous
893    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
894    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
895    Assume: sizes should be long enough to hold all the values.
896 */
897 #undef __FUNCT__
898 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
899 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
900 {
901   int        i,j,k,row;
902   PetscTruth flg;
903 
904   PetscFunctionBegin;
905   for (i=0,j=0; i<n; j++) {
906     row = idx[i];
907     if (row%bs!=0) { /* Not the begining of a block */
908       sizes[j] = 1;
909       i++;
910     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
911       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
912       i++;
913     } else { /* Begining of the block, so check if the complete block exists */
914       flg = PETSC_TRUE;
915       for (k=1; k<bs; k++) {
916         if (row+k != idx[i+k]) { /* break in the block */
917           flg = PETSC_FALSE;
918           break;
919         }
920       }
921       if (flg == PETSC_TRUE) { /* No break in the bs */
922         sizes[j] = bs;
923         i+= bs;
924       } else {
925         sizes[j] = 1;
926         i++;
927       }
928     }
929   }
930   *bs_max = j;
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNCT__
935 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
936 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag)
937 {
938   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
939   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
940   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
941   PetscScalar zero = 0.0;
942   MatScalar   *aa;
943 
944   PetscFunctionBegin;
945   /* Make a copy of the IS and  sort it */
946   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
947   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
948 
949   /* allocate memory for rows,sizes */
950   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
951   sizes = rows + is_n;
952 
953   /* copy IS values to rows, and sort them */
954   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
955   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
956   if (baij->keepzeroedrows) {
957     for (i=0; i<is_n; i++) { sizes[i] = 1; }
958     bs_max = is_n;
959   } else {
960     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
961   }
962   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
963 
964   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
965     row   = rows[j];
966     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
967     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
968     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
969     if (sizes[i] == bs && !baij->keepzeroedrows) {
970       if (diag) {
971         if (baij->ilen[row/bs] > 0) {
972           baij->ilen[row/bs]       = 1;
973           baij->j[baij->i[row/bs]] = row/bs;
974           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
975         }
976         /* Now insert all the diagonal values for this bs */
977         for (k=0; k<bs; k++) {
978           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
979         }
980       } else { /* (!diag) */
981         baij->ilen[row/bs] = 0;
982       } /* end (!diag) */
983     } else { /* (sizes[i] != bs) */
984 #if defined (PETSC_USE_DEBUG)
985       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
986 #endif
987       for (k=0; k<count; k++) {
988         aa[0] =  zero;
989         aa    += bs;
990       }
991       if (diag) {
992         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
993       }
994     }
995   }
996 
997   ierr = PetscFree(rows);CHKERRQ(ierr);
998   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1004 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
1005 {
1006   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1007   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1008   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1009   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1010   int         ridx,cidx,bs2=a->bs2,ierr;
1011   PetscTruth  roworiented=a->roworiented;
1012   MatScalar   *ap,value,*aa=a->a,*bap;
1013 
1014   PetscFunctionBegin;
1015   for (k=0; k<m; k++) { /* loop over added rows */
1016     row  = im[k]; brow = row/bs;
1017     if (row < 0) continue;
1018 #if defined(PETSC_USE_BOPT_g)
1019     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1020 #endif
1021     rp   = aj + ai[brow];
1022     ap   = aa + bs2*ai[brow];
1023     rmax = imax[brow];
1024     nrow = ailen[brow];
1025     low  = 0;
1026     for (l=0; l<n; l++) { /* loop over added columns */
1027       if (in[l] < 0) continue;
1028 #if defined(PETSC_USE_BOPT_g)
1029       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1030 #endif
1031       col = in[l]; bcol = col/bs;
1032       ridx = row % bs; cidx = col % bs;
1033       if (roworiented) {
1034         value = v[l + k*n];
1035       } else {
1036         value = v[k + l*m];
1037       }
1038       if (!sorted) low = 0; high = nrow;
1039       while (high-low > 7) {
1040         t = (low+high)/2;
1041         if (rp[t] > bcol) high = t;
1042         else              low  = t;
1043       }
1044       for (i=low; i<high; i++) {
1045         if (rp[i] > bcol) break;
1046         if (rp[i] == bcol) {
1047           bap  = ap +  bs2*i + bs*cidx + ridx;
1048           if (is == ADD_VALUES) *bap += value;
1049           else                  *bap  = value;
1050           goto noinsert1;
1051         }
1052       }
1053       if (nonew == 1) goto noinsert1;
1054       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1055       if (nrow >= rmax) {
1056         /* there is no extra room in row, therefore enlarge */
1057         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1058         MatScalar *new_a;
1059 
1060         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1061 
1062         /* Malloc new storage space */
1063         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1064 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1065         new_j   = (int*)(new_a + bs2*new_nz);
1066         new_i   = new_j + new_nz;
1067 
1068         /* copy over old data into new slots */
1069         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1070         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1071         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1072         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1073         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1074         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1075         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1076         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1077         /* free up old matrix storage */
1078         ierr = PetscFree(a->a);CHKERRQ(ierr);
1079         if (!a->singlemalloc) {
1080           ierr = PetscFree(a->i);CHKERRQ(ierr);
1081           ierr = PetscFree(a->j);CHKERRQ(ierr);
1082         }
1083         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1084         a->singlemalloc = PETSC_TRUE;
1085 
1086         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1087         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1088         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1089         a->maxnz += bs2*CHUNKSIZE;
1090         a->reallocs++;
1091         a->nz++;
1092       }
1093       N = nrow++ - 1;
1094       /* shift up all the later entries in this row */
1095       for (ii=N; ii>=i; ii--) {
1096         rp[ii+1] = rp[ii];
1097         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1098       }
1099       if (N>=i) {
1100         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1101       }
1102       rp[i]                      = bcol;
1103       ap[bs2*i + bs*cidx + ridx] = value;
1104       noinsert1:;
1105       low = i;
1106     }
1107     ailen[brow] = nrow;
1108   }
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1115 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1116 {
1117   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1118   Mat         outA;
1119   int         ierr;
1120   PetscTruth  row_identity,col_identity;
1121 
1122   PetscFunctionBegin;
1123   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1124   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1125   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1126   if (!row_identity || !col_identity) {
1127     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1128   }
1129 
1130   outA          = inA;
1131   inA->factor   = FACTOR_LU;
1132 
1133   if (!a->diag) {
1134     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1135   }
1136 
1137   a->row        = row;
1138   a->col        = col;
1139   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1140   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1141 
1142   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1143   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1144   PetscLogObjectParent(inA,a->icol);
1145 
1146   /*
1147       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1148       for ILU(0) factorization with natural ordering
1149   */
1150   if (a->bs < 8) {
1151     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1152   } else {
1153     if (!a->solve_work) {
1154       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1155       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1156     }
1157   }
1158 
1159   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1160 
1161   PetscFunctionReturn(0);
1162 }
1163 #undef __FUNCT__
1164 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1165 int MatPrintHelp_SeqBAIJ(Mat A)
1166 {
1167   static PetscTruth called = PETSC_FALSE;
1168   MPI_Comm          comm = A->comm;
1169   int               ierr;
1170 
1171   PetscFunctionBegin;
1172   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1173   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1174   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 EXTERN_C_BEGIN
1179 #undef __FUNCT__
1180 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1181 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1182 {
1183   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1184   int         i,nz,nbs;
1185 
1186   PetscFunctionBegin;
1187   nz  = baij->maxnz/baij->bs2;
1188   nbs = baij->nbs;
1189   for (i=0; i<nz; i++) {
1190     baij->j[i] = indices[i];
1191   }
1192   baij->nz = nz;
1193   for (i=0; i<nbs; i++) {
1194     baij->ilen[i] = baij->imax[i];
1195   }
1196 
1197   PetscFunctionReturn(0);
1198 }
1199 EXTERN_C_END
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1203 /*@
1204     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1205        in the matrix.
1206 
1207   Input Parameters:
1208 +  mat - the SeqBAIJ matrix
1209 -  indices - the column indices
1210 
1211   Level: advanced
1212 
1213   Notes:
1214     This can be called if you have precomputed the nonzero structure of the
1215   matrix and want to provide it to the matrix object to improve the performance
1216   of the MatSetValues() operation.
1217 
1218     You MUST have set the correct numbers of nonzeros per row in the call to
1219   MatCreateSeqBAIJ().
1220 
1221     MUST be called before any calls to MatSetValues();
1222 
1223 @*/
1224 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1225 {
1226   int ierr,(*f)(Mat,int *);
1227 
1228   PetscFunctionBegin;
1229   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1230   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1231   if (f) {
1232     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1233   } else {
1234     SETERRQ(1,"Wrong type of matrix to set column indices");
1235   }
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 #undef __FUNCT__
1240 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1241 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1242 {
1243   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1244   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1245   PetscReal    atmp;
1246   PetscScalar  *x,zero = 0.0;
1247   MatScalar    *aa;
1248   int          ncols,brow,krow,kcol;
1249 
1250   PetscFunctionBegin;
1251   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1252   bs   = a->bs;
1253   aa   = a->a;
1254   ai   = a->i;
1255   aj   = a->j;
1256   mbs = a->mbs;
1257 
1258   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1259   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1260   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1261   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1262   for (i=0; i<mbs; i++) {
1263     ncols = ai[1] - ai[0]; ai++;
1264     brow  = bs*i;
1265     for (j=0; j<ncols; j++){
1266       /* bcol = bs*(*aj); */
1267       for (kcol=0; kcol<bs; kcol++){
1268         for (krow=0; krow<bs; krow++){
1269           atmp = PetscAbsScalar(*aa); aa++;
1270           row = brow + krow;    /* row index */
1271           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1272           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1273         }
1274       }
1275       aj++;
1276     }
1277   }
1278   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 #undef __FUNCT__
1283 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1284 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1285 {
1286   int        ierr;
1287 
1288   PetscFunctionBegin;
1289   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 #undef __FUNCT__
1294 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1295 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array)
1296 {
1297   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1298   PetscFunctionBegin;
1299   *array = a->a;
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1305 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array)
1306 {
1307   PetscFunctionBegin;
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 /* -------------------------------------------------------------------*/
1312 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1313        MatGetRow_SeqBAIJ,
1314        MatRestoreRow_SeqBAIJ,
1315        MatMult_SeqBAIJ_N,
1316        MatMultAdd_SeqBAIJ_N,
1317        MatMultTranspose_SeqBAIJ,
1318        MatMultTransposeAdd_SeqBAIJ,
1319        MatSolve_SeqBAIJ_N,
1320        0,
1321        0,
1322        0,
1323        MatLUFactor_SeqBAIJ,
1324        0,
1325        0,
1326        MatTranspose_SeqBAIJ,
1327        MatGetInfo_SeqBAIJ,
1328        MatEqual_SeqBAIJ,
1329        MatGetDiagonal_SeqBAIJ,
1330        MatDiagonalScale_SeqBAIJ,
1331        MatNorm_SeqBAIJ,
1332        0,
1333        MatAssemblyEnd_SeqBAIJ,
1334        0,
1335        MatSetOption_SeqBAIJ,
1336        MatZeroEntries_SeqBAIJ,
1337        MatZeroRows_SeqBAIJ,
1338        MatLUFactorSymbolic_SeqBAIJ,
1339        MatLUFactorNumeric_SeqBAIJ_N,
1340        0,
1341        0,
1342        MatSetUpPreallocation_SeqBAIJ,
1343        MatILUFactorSymbolic_SeqBAIJ,
1344        0,
1345        MatGetArray_SeqBAIJ,
1346        MatRestoreArray_SeqBAIJ,
1347        MatDuplicate_SeqBAIJ,
1348        0,
1349        0,
1350        MatILUFactor_SeqBAIJ,
1351        0,
1352        0,
1353        MatGetSubMatrices_SeqBAIJ,
1354        MatIncreaseOverlap_SeqBAIJ,
1355        MatGetValues_SeqBAIJ,
1356        0,
1357        MatPrintHelp_SeqBAIJ,
1358        MatScale_SeqBAIJ,
1359        0,
1360        0,
1361        0,
1362        MatGetBlockSize_SeqBAIJ,
1363        MatGetRowIJ_SeqBAIJ,
1364        MatRestoreRowIJ_SeqBAIJ,
1365        0,
1366        0,
1367        0,
1368        0,
1369        0,
1370        0,
1371        MatSetValuesBlocked_SeqBAIJ,
1372        MatGetSubMatrix_SeqBAIJ,
1373        MatDestroy_SeqBAIJ,
1374        MatView_SeqBAIJ,
1375        MatGetPetscMaps_Petsc,
1376        0,
1377        0,
1378        0,
1379        0,
1380        0,
1381        0,
1382        MatGetRowMax_SeqBAIJ,
1383        MatConvert_Basic};
1384 
1385 EXTERN_C_BEGIN
1386 #undef __FUNCT__
1387 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1388 int MatStoreValues_SeqBAIJ(Mat mat)
1389 {
1390   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1391   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1392   int         ierr;
1393 
1394   PetscFunctionBegin;
1395   if (aij->nonew != 1) {
1396     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1397   }
1398 
1399   /* allocate space for values if not already there */
1400   if (!aij->saved_values) {
1401     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1402   }
1403 
1404   /* copy values over */
1405   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1406   PetscFunctionReturn(0);
1407 }
1408 EXTERN_C_END
1409 
1410 EXTERN_C_BEGIN
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1413 int MatRetrieveValues_SeqBAIJ(Mat mat)
1414 {
1415   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1416   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1417 
1418   PetscFunctionBegin;
1419   if (aij->nonew != 1) {
1420     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1421   }
1422   if (!aij->saved_values) {
1423     SETERRQ(1,"Must call MatStoreValues(A);first");
1424   }
1425 
1426   /* copy values over */
1427   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1428   PetscFunctionReturn(0);
1429 }
1430 EXTERN_C_END
1431 
1432 EXTERN_C_BEGIN
1433 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1434 EXTERN_C_END
1435 
1436 EXTERN_C_BEGIN
1437 #undef __FUNCT__
1438 #define __FUNCT__ "MatCreate_SeqBAIJ"
1439 int MatCreate_SeqBAIJ(Mat B)
1440 {
1441   int         ierr,size;
1442   Mat_SeqBAIJ *b;
1443 
1444   PetscFunctionBegin;
1445   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1446   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1447 
1448   B->m = B->M = PetscMax(B->m,B->M);
1449   B->n = B->N = PetscMax(B->n,B->N);
1450   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1451   B->data = (void*)b;
1452   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1453   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1454   B->factor           = 0;
1455   B->lupivotthreshold = 1.0;
1456   B->mapping          = 0;
1457   b->row              = 0;
1458   b->col              = 0;
1459   b->icol             = 0;
1460   b->reallocs         = 0;
1461   b->saved_values     = 0;
1462 #if defined(PETSC_USE_MAT_SINGLE)
1463   b->setvalueslen     = 0;
1464   b->setvaluescopy    = PETSC_NULL;
1465 #endif
1466   b->single_precision_solves = PETSC_FALSE;
1467 
1468   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1469   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1470 
1471   b->sorted           = PETSC_FALSE;
1472   b->roworiented      = PETSC_TRUE;
1473   b->nonew            = 0;
1474   b->diag             = 0;
1475   b->solve_work       = 0;
1476   b->mult_work        = 0;
1477   b->spptr            = 0;
1478   B->info.nz_unneeded = (PetscReal)b->maxnz;
1479   b->keepzeroedrows   = PETSC_FALSE;
1480 
1481   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1482                                      "MatStoreValues_SeqBAIJ",
1483                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1484   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1485                                      "MatRetrieveValues_SeqBAIJ",
1486                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1487   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1488                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1489                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1490   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1491                                      "MatConvert_SeqBAIJ_SeqAIJ",
1492                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1493   PetscFunctionReturn(0);
1494 }
1495 EXTERN_C_END
1496 
1497 #undef __FUNCT__
1498 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1499 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1500 {
1501   Mat         C;
1502   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1503   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1504 
1505   PetscFunctionBegin;
1506   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1507 
1508   *B = 0;
1509   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1510   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1511   c    = (Mat_SeqBAIJ*)C->data;
1512 
1513   c->bs         = a->bs;
1514   c->bs2        = a->bs2;
1515   c->mbs        = a->mbs;
1516   c->nbs        = a->nbs;
1517   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1518 
1519   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1520   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1521   for (i=0; i<mbs; i++) {
1522     c->imax[i] = a->imax[i];
1523     c->ilen[i] = a->ilen[i];
1524   }
1525 
1526   /* allocate the matrix space */
1527   c->singlemalloc = PETSC_TRUE;
1528   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1529   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1530   c->j = (int*)(c->a + nz*bs2);
1531   c->i = c->j + nz;
1532   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1533   if (mbs > 0) {
1534     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1535     if (cpvalues == MAT_COPY_VALUES) {
1536       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1537     } else {
1538       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1539     }
1540   }
1541 
1542   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1543   c->sorted      = a->sorted;
1544   c->roworiented = a->roworiented;
1545   c->nonew       = a->nonew;
1546 
1547   if (a->diag) {
1548     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1549     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1550     for (i=0; i<mbs; i++) {
1551       c->diag[i] = a->diag[i];
1552     }
1553   } else c->diag        = 0;
1554   c->nz                 = a->nz;
1555   c->maxnz              = a->maxnz;
1556   c->solve_work         = 0;
1557   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1558   c->mult_work          = 0;
1559   C->preallocated       = PETSC_TRUE;
1560   C->assembled          = PETSC_TRUE;
1561   *B = C;
1562   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 EXTERN_C_BEGIN
1567 #undef __FUNCT__
1568 #define __FUNCT__ "MatLoad_SeqBAIJ"
1569 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1570 {
1571   Mat_SeqBAIJ  *a;
1572   Mat          B;
1573   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1574   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1575   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1576   int          *masked,nmask,tmp,bs2,ishift;
1577   PetscScalar  *aa;
1578   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1579 
1580   PetscFunctionBegin;
1581   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1582   bs2  = bs*bs;
1583 
1584   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1585   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1586   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1587   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1588   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1589   M = header[1]; N = header[2]; nz = header[3];
1590 
1591   if (header[3] < 0) {
1592     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1593   }
1594 
1595   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1596 
1597   /*
1598      This code adds extra rows to make sure the number of rows is
1599     divisible by the blocksize
1600   */
1601   mbs        = M/bs;
1602   extra_rows = bs - M + bs*(mbs);
1603   if (extra_rows == bs) extra_rows = 0;
1604   else                  mbs++;
1605   if (extra_rows) {
1606     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1607   }
1608 
1609   /* read in row lengths */
1610   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1611   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1612   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1613 
1614   /* read in column indices */
1615   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1616   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1617   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1618 
1619   /* loop over row lengths determining block row lengths */
1620   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1621   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1622   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1623   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1624   masked   = mask + mbs;
1625   rowcount = 0; nzcount = 0;
1626   for (i=0; i<mbs; i++) {
1627     nmask = 0;
1628     for (j=0; j<bs; j++) {
1629       kmax = rowlengths[rowcount];
1630       for (k=0; k<kmax; k++) {
1631         tmp = jj[nzcount++]/bs;
1632         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1633       }
1634       rowcount++;
1635     }
1636     browlengths[i] += nmask;
1637     /* zero out the mask elements we set */
1638     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1639   }
1640 
1641   /* create our matrix */
1642   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1643   B = *A;
1644   a = (Mat_SeqBAIJ*)B->data;
1645 
1646   /* set matrix "i" values */
1647   a->i[0] = 0;
1648   for (i=1; i<= mbs; i++) {
1649     a->i[i]      = a->i[i-1] + browlengths[i-1];
1650     a->ilen[i-1] = browlengths[i-1];
1651   }
1652   a->nz         = 0;
1653   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1654 
1655   /* read in nonzero values */
1656   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1657   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1658   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1659 
1660   /* set "a" and "j" values into matrix */
1661   nzcount = 0; jcount = 0;
1662   for (i=0; i<mbs; i++) {
1663     nzcountb = nzcount;
1664     nmask    = 0;
1665     for (j=0; j<bs; j++) {
1666       kmax = rowlengths[i*bs+j];
1667       for (k=0; k<kmax; k++) {
1668         tmp = jj[nzcount++]/bs;
1669 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1670       }
1671     }
1672     /* sort the masked values */
1673     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1674 
1675     /* set "j" values into matrix */
1676     maskcount = 1;
1677     for (j=0; j<nmask; j++) {
1678       a->j[jcount++]  = masked[j];
1679       mask[masked[j]] = maskcount++;
1680     }
1681     /* set "a" values into matrix */
1682     ishift = bs2*a->i[i];
1683     for (j=0; j<bs; j++) {
1684       kmax = rowlengths[i*bs+j];
1685       for (k=0; k<kmax; k++) {
1686         tmp       = jj[nzcountb]/bs ;
1687         block     = mask[tmp] - 1;
1688         point     = jj[nzcountb] - bs*tmp;
1689         idx       = ishift + bs2*block + j + bs*point;
1690         a->a[idx] = (MatScalar)aa[nzcountb++];
1691       }
1692     }
1693     /* zero out the mask elements we set */
1694     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1695   }
1696   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1697 
1698   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1699   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1700   ierr = PetscFree(aa);CHKERRQ(ierr);
1701   ierr = PetscFree(jj);CHKERRQ(ierr);
1702   ierr = PetscFree(mask);CHKERRQ(ierr);
1703 
1704   B->assembled = PETSC_TRUE;
1705 
1706   ierr = MatView_Private(B);CHKERRQ(ierr);
1707   PetscFunctionReturn(0);
1708 }
1709 EXTERN_C_END
1710 
1711 #undef __FUNCT__
1712 #define __FUNCT__ "MatCreateSeqBAIJ"
1713 /*@C
1714    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1715    compressed row) format.  For good matrix assembly performance the
1716    user should preallocate the matrix storage by setting the parameter nz
1717    (or the array nnz).  By setting these parameters accurately, performance
1718    during matrix assembly can be increased by more than a factor of 50.
1719 
1720    Collective on MPI_Comm
1721 
1722    Input Parameters:
1723 +  comm - MPI communicator, set to PETSC_COMM_SELF
1724 .  bs - size of block
1725 .  m - number of rows
1726 .  n - number of columns
1727 .  nz - number of nonzero blocks  per block row (same for all rows)
1728 -  nnz - array containing the number of nonzero blocks in the various block rows
1729          (possibly different for each block row) or PETSC_NULL
1730 
1731    Output Parameter:
1732 .  A - the matrix
1733 
1734    Options Database Keys:
1735 .   -mat_no_unroll - uses code that does not unroll the loops in the
1736                      block calculations (much slower)
1737 .    -mat_block_size - size of the blocks to use
1738 
1739    Level: intermediate
1740 
1741    Notes:
1742    A nonzero block is any block that as 1 or more nonzeros in it
1743 
1744    The block AIJ format is fully compatible with standard Fortran 77
1745    storage.  That is, the stored row and column indices can begin at
1746    either one (as in Fortran) or zero.  See the users' manual for details.
1747 
1748    Specify the preallocated storage with either nz or nnz (not both).
1749    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1750    allocation.  For additional details, see the users manual chapter on
1751    matrices.
1752 
1753 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1754 @*/
1755 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1756 {
1757   int ierr;
1758 
1759   PetscFunctionBegin;
1760   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1761   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
1762   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1763   PetscFunctionReturn(0);
1764 }
1765 
1766 #undef __FUNCT__
1767 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
1768 /*@C
1769    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1770    per row in the matrix. For good matrix assembly performance the
1771    user should preallocate the matrix storage by setting the parameter nz
1772    (or the array nnz).  By setting these parameters accurately, performance
1773    during matrix assembly can be increased by more than a factor of 50.
1774 
1775    Collective on MPI_Comm
1776 
1777    Input Parameters:
1778 +  A - the matrix
1779 .  bs - size of block
1780 .  nz - number of block nonzeros per block row (same for all rows)
1781 -  nnz - array containing the number of block nonzeros in the various block rows
1782          (possibly different for each block row) or PETSC_NULL
1783 
1784    Options Database Keys:
1785 .   -mat_no_unroll - uses code that does not unroll the loops in the
1786                      block calculations (much slower)
1787 .    -mat_block_size - size of the blocks to use
1788 
1789    Level: intermediate
1790 
1791    Notes:
1792    The block AIJ format is fully compatible with standard Fortran 77
1793    storage.  That is, the stored row and column indices can begin at
1794    either one (as in Fortran) or zero.  See the users' manual for details.
1795 
1796    Specify the preallocated storage with either nz or nnz (not both).
1797    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1798    allocation.  For additional details, see the users manual chapter on
1799    matrices.
1800 
1801 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1802 @*/
1803 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1804 {
1805   Mat_SeqBAIJ *b;
1806   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1807   PetscTruth  flg;
1808 
1809   PetscFunctionBegin;
1810   ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr);
1811   if (!flg) PetscFunctionReturn(0);
1812 
1813   B->preallocated = PETSC_TRUE;
1814   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1815   if (nnz && newbs != bs) {
1816     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1817   }
1818   bs   = newbs;
1819 
1820   mbs  = B->m/bs;
1821   nbs  = B->n/bs;
1822   bs2  = bs*bs;
1823 
1824   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1825     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1826   }
1827 
1828   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1829   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1830   if (nnz) {
1831     for (i=0; i<mbs; i++) {
1832       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1833       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1834     }
1835   }
1836 
1837   b       = (Mat_SeqBAIJ*)B->data;
1838   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1839   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1840   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1841   if (!flg) {
1842     switch (bs) {
1843     case 1:
1844       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1845       B->ops->mult            = MatMult_SeqBAIJ_1;
1846       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1847       break;
1848     case 2:
1849       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1850       B->ops->mult            = MatMult_SeqBAIJ_2;
1851       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1852       break;
1853     case 3:
1854       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1855       B->ops->mult            = MatMult_SeqBAIJ_3;
1856       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1857       break;
1858     case 4:
1859       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1860       B->ops->mult            = MatMult_SeqBAIJ_4;
1861       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1862       break;
1863     case 5:
1864       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1865       B->ops->mult            = MatMult_SeqBAIJ_5;
1866       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1867       break;
1868     case 6:
1869       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1870       B->ops->mult            = MatMult_SeqBAIJ_6;
1871       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1872       break;
1873     case 7:
1874       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1875       B->ops->mult            = MatMult_SeqBAIJ_7;
1876       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1877       break;
1878     default:
1879       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1880       B->ops->mult            = MatMult_SeqBAIJ_N;
1881       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1882       break;
1883     }
1884   }
1885   b->bs      = bs;
1886   b->mbs     = mbs;
1887   b->nbs     = nbs;
1888   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1889   if (!nnz) {
1890     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1891     else if (nz <= 0)        nz = 1;
1892     for (i=0; i<mbs; i++) b->imax[i] = nz;
1893     nz = nz*mbs;
1894   } else {
1895     nz = 0;
1896     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1897   }
1898 
1899   /* allocate the matrix space */
1900   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1901   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1902   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1903   b->j  = (int*)(b->a + nz*bs2);
1904   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1905   b->i  = b->j + nz;
1906   b->singlemalloc = PETSC_TRUE;
1907 
1908   b->i[0] = 0;
1909   for (i=1; i<mbs+1; i++) {
1910     b->i[i] = b->i[i-1] + b->imax[i-1];
1911   }
1912 
1913   /* b->ilen will count nonzeros in each block row so far. */
1914   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1915   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1916   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1917 
1918   b->bs               = bs;
1919   b->bs2              = bs2;
1920   b->mbs              = mbs;
1921   b->nz               = 0;
1922   b->maxnz            = nz*bs2;
1923   B->info.nz_unneeded = (PetscReal)b->maxnz;
1924   PetscFunctionReturn(0);
1925 }
1926 
1927