xref: /petsc/src/mat/impls/baij/seq/baij.c (revision e74ef6923b2c0251c8aab3a0671bbb2dfd6aaf97)
1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "petscsys.h"                     /*I "petscmat.h" I*/
8 #include "src/mat/impls/baij/seq/baij.h"
9 #include "src/vec/vecimpl.h"
10 #include "src/inline/spops.h"
11 
12 /*  UGLY, ugly, ugly
13    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
14    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
15    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
16    converts the entries into single precision and then calls ..._MatScalar() to put them
17    into the single precision data structures.
18 */
19 #if defined(PETSC_USE_MAT_SINGLE)
20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
21 #else
22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
23 #endif
24 
25 #define CHUNKSIZE  10
26 
27 /*
28      Checks for missing diagonals
29 */
30 #undef __FUNCT__
31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
32 int MatMissingDiagonal_SeqBAIJ(Mat A)
33 {
34   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
35   int         *diag,*jj = a->j,i,ierr;
36 
37   PetscFunctionBegin;
38   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
39   diag = a->diag;
40   for (i=0; i<a->mbs; i++) {
41     if (jj[diag[i]] != i) {
42       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
43     }
44   }
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
50 int MatMarkDiagonal_SeqBAIJ(Mat A)
51 {
52   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
53   int         i,j,*diag,m = a->mbs,ierr;
54 
55   PetscFunctionBegin;
56   if (a->diag) PetscFunctionReturn(0);
57 
58   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
59   PetscLogObjectMemory(A,(m+1)*sizeof(int));
60   for (i=0; i<m; i++) {
61     diag[i] = a->i[i+1];
62     for (j=a->i[i]; j<a->i[i+1]; j++) {
63       if (a->j[j] == i) {
64         diag[i] = j;
65         break;
66       }
67     }
68   }
69   a->diag = diag;
70   PetscFunctionReturn(0);
71 }
72 
73 
74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
79 {
80   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
81   int         ierr,n = a->mbs,i;
82 
83   PetscFunctionBegin;
84   *nn = n;
85   if (!ia) PetscFunctionReturn(0);
86   if (symmetric) {
87     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
88   } else if (oshift == 1) {
89     /* temporarily add 1 to i and j indices */
90     int nz = a->i[n];
91     for (i=0; i<nz; i++) a->j[i]++;
92     for (i=0; i<n+1; i++) a->i[i]++;
93     *ia = a->i; *ja = a->j;
94   } else {
95     *ia = a->i; *ja = a->j;
96   }
97 
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
104 {
105   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
106   int         i,n = a->mbs,ierr;
107 
108   PetscFunctionBegin;
109   if (!ia) PetscFunctionReturn(0);
110   if (symmetric) {
111     ierr = PetscFree(*ia);CHKERRQ(ierr);
112     ierr = PetscFree(*ja);CHKERRQ(ierr);
113   } else if (oshift == 1) {
114     int nz = a->i[n]-1;
115     for (i=0; i<nz; i++) a->j[i]--;
116     for (i=0; i<n+1; i++) a->i[i]--;
117   }
118   PetscFunctionReturn(0);
119 }
120 
121 #undef __FUNCT__
122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
124 {
125   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
126 
127   PetscFunctionBegin;
128   *bs = baij->bs;
129   PetscFunctionReturn(0);
130 }
131 
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatDestroy_SeqBAIJ"
135 int MatDestroy_SeqBAIJ(Mat A)
136 {
137   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
138   int         ierr;
139 
140   PetscFunctionBegin;
141 #if defined(PETSC_USE_LOG)
142   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
143 #endif
144   ierr = PetscFree(a->a);CHKERRQ(ierr);
145   if (!a->singlemalloc) {
146     ierr = PetscFree(a->i);CHKERRQ(ierr);
147     ierr = PetscFree(a->j);CHKERRQ(ierr);
148   }
149   if (a->row) {
150     ierr = ISDestroy(a->row);CHKERRQ(ierr);
151   }
152   if (a->col) {
153     ierr = ISDestroy(a->col);CHKERRQ(ierr);
154   }
155   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
156   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
157   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
158   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
159   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
160   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
161   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
162 #if defined(PETSC_USE_MAT_SINGLE)
163   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
164 #endif
165   ierr = PetscFree(a);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatSetOption_SeqBAIJ"
171 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
172 {
173   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
174 
175   PetscFunctionBegin;
176   switch (op) {
177   case MAT_ROW_ORIENTED:
178     a->roworiented    = PETSC_TRUE;
179     break;
180   case MAT_COLUMN_ORIENTED:
181     a->roworiented    = PETSC_FALSE;
182     break;
183   case MAT_COLUMNS_SORTED:
184     a->sorted         = PETSC_TRUE;
185     break;
186   case MAT_COLUMNS_UNSORTED:
187     a->sorted         = PETSC_FALSE;
188     break;
189   case MAT_KEEP_ZEROED_ROWS:
190     a->keepzeroedrows = PETSC_TRUE;
191     break;
192   case MAT_NO_NEW_NONZERO_LOCATIONS:
193     a->nonew          = 1;
194     break;
195   case MAT_NEW_NONZERO_LOCATION_ERR:
196     a->nonew          = -1;
197     break;
198   case MAT_NEW_NONZERO_ALLOCATION_ERR:
199     a->nonew          = -2;
200     break;
201   case MAT_YES_NEW_NONZERO_LOCATIONS:
202     a->nonew          = 0;
203     break;
204   case MAT_ROWS_SORTED:
205   case MAT_ROWS_UNSORTED:
206   case MAT_YES_NEW_DIAGONALS:
207   case MAT_IGNORE_OFF_PROC_ENTRIES:
208   case MAT_USE_HASH_TABLE:
209     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
210     break;
211   case MAT_NO_NEW_DIAGONALS:
212     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
213     break;
214   case MAT_USE_SINGLE_PRECISION_SOLVES:
215     if (a->bs==4) {
216       PetscTruth sse_enabled_local,sse_enabled_global;
217       int        ierr;
218 
219       sse_enabled_local  = PETSC_FALSE;
220       sse_enabled_global = PETSC_FALSE;
221 
222       ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr);
223 #if defined(PETSC_HAVE_SSE)
224       if (sse_enabled_local) {
225           a->single_precision_solves = PETSC_TRUE;
226           A->ops->solve              = MatSolve_SeqBAIJ_Update;
227           A->ops->solvetranspose     = MatSolveTranspose_SeqBAIJ_Update;
228           PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n");
229           break;
230       } else {
231         PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
232       }
233 #else
234       PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n");
235 #endif
236     }
237     break;
238   default:
239     SETERRQ(PETSC_ERR_SUP,"unknown option");
240   }
241   PetscFunctionReturn(0);
242 }
243 
244 #undef __FUNCT__
245 #define __FUNCT__ "MatGetOwnershipRange_SeqBAIJ"
246 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
247 {
248   PetscFunctionBegin;
249   if (m) *m = 0;
250   if (n) *n = A->m;
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatGetRow_SeqBAIJ"
256 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
257 {
258   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
259   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
260   MatScalar    *aa,*aa_i;
261   PetscScalar  *v_i;
262 
263   PetscFunctionBegin;
264   bs  = a->bs;
265   ai  = a->i;
266   aj  = a->j;
267   aa  = a->a;
268   bs2 = a->bs2;
269 
270   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
271 
272   bn  = row/bs;   /* Block number */
273   bp  = row % bs; /* Block Position */
274   M   = ai[bn+1] - ai[bn];
275   *nz = bs*M;
276 
277   if (v) {
278     *v = 0;
279     if (*nz) {
280       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
281       for (i=0; i<M; i++) { /* for each block in the block row */
282         v_i  = *v + i*bs;
283         aa_i = aa + bs2*(ai[bn] + i);
284         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
285       }
286     }
287   }
288 
289   if (idx) {
290     *idx = 0;
291     if (*nz) {
292       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
293       for (i=0; i<M; i++) { /* for each block in the block row */
294         idx_i = *idx + i*bs;
295         itmp  = bs*aj[ai[bn] + i];
296         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
297       }
298     }
299   }
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
305 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
306 {
307   int ierr;
308 
309   PetscFunctionBegin;
310   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
311   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatTranspose_SeqBAIJ"
317 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
318 {
319   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
320   Mat         C;
321   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
322   int         *rows,*cols,bs2=a->bs2;
323   PetscScalar *array;
324 
325   PetscFunctionBegin;
326   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
327   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
328   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
329 
330 #if defined(PETSC_USE_MAT_SINGLE)
331   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
332   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
333 #else
334   array = a->a;
335 #endif
336 
337   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
338   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
339   ierr = PetscFree(col);CHKERRQ(ierr);
340   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
341   cols = rows + bs;
342   for (i=0; i<mbs; i++) {
343     cols[0] = i*bs;
344     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
345     len = ai[i+1] - ai[i];
346     for (j=0; j<len; j++) {
347       rows[0] = (*aj++)*bs;
348       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
349       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
350       array += bs2;
351     }
352   }
353   ierr = PetscFree(rows);CHKERRQ(ierr);
354 #if defined(PETSC_USE_MAT_SINGLE)
355   ierr = PetscFree(array);
356 #endif
357 
358   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
359   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
360 
361   if (B) {
362     *B = C;
363   } else {
364     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
365   }
366   PetscFunctionReturn(0);
367 }
368 
369 #undef __FUNCT__
370 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
371 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
372 {
373   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
374   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
375   PetscScalar *aa;
376   FILE        *file;
377 
378   PetscFunctionBegin;
379   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
380   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
381   col_lens[0] = MAT_COOKIE;
382 
383   col_lens[1] = A->m;
384   col_lens[2] = A->n;
385   col_lens[3] = a->nz*bs2;
386 
387   /* store lengths of each row and write (including header) to file */
388   count = 0;
389   for (i=0; i<a->mbs; i++) {
390     for (j=0; j<bs; j++) {
391       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
392     }
393   }
394   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
395   ierr = PetscFree(col_lens);CHKERRQ(ierr);
396 
397   /* store column indices (zero start index) */
398   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
399   count = 0;
400   for (i=0; i<a->mbs; i++) {
401     for (j=0; j<bs; j++) {
402       for (k=a->i[i]; k<a->i[i+1]; k++) {
403         for (l=0; l<bs; l++) {
404           jj[count++] = bs*a->j[k] + l;
405         }
406       }
407     }
408   }
409   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
410   ierr = PetscFree(jj);CHKERRQ(ierr);
411 
412   /* store nonzero values */
413   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
414   count = 0;
415   for (i=0; i<a->mbs; i++) {
416     for (j=0; j<bs; j++) {
417       for (k=a->i[i]; k<a->i[i+1]; k++) {
418         for (l=0; l<bs; l++) {
419           aa[count++] = a->a[bs2*k + l*bs + j];
420         }
421       }
422     }
423   }
424   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
425   ierr = PetscFree(aa);CHKERRQ(ierr);
426 
427   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
428   if (file) {
429     fprintf(file,"-matload_block_size %d\n",a->bs);
430   }
431   PetscFunctionReturn(0);
432 }
433 
434 #undef __FUNCT__
435 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
436 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
437 {
438   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
439   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
440   PetscViewerFormat format;
441 
442   PetscFunctionBegin;
443   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
444   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) {
445     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
446   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
447     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
448   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
449     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
450     for (i=0; i<a->mbs; i++) {
451       for (j=0; j<bs; j++) {
452         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
453         for (k=a->i[i]; k<a->i[i+1]; k++) {
454           for (l=0; l<bs; l++) {
455 #if defined(PETSC_USE_COMPLEX)
456             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
457               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l,
458                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
459             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
460               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l,
461                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
462             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
463               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
464             }
465 #else
466             if (a->a[bs2*k + l*bs + j] != 0.0) {
467               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
468             }
469 #endif
470           }
471         }
472         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
473       }
474     }
475     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
476   } else {
477     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
478     for (i=0; i<a->mbs; i++) {
479       for (j=0; j<bs; j++) {
480         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
481         for (k=a->i[i]; k<a->i[i+1]; k++) {
482           for (l=0; l<bs; l++) {
483 #if defined(PETSC_USE_COMPLEX)
484             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
485               ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l,
486                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
487             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
488               ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l,
489                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
490             } else {
491               ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
492             }
493 #else
494             ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
495 #endif
496           }
497         }
498         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
499       }
500     }
501     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
502   }
503   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
504   PetscFunctionReturn(0);
505 }
506 
507 #undef __FUNCT__
508 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
509 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
510 {
511   Mat          A = (Mat) Aa;
512   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
513   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
514   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
515   MatScalar    *aa;
516   PetscViewer  viewer;
517 
518   PetscFunctionBegin;
519 
520   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
521   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
522 
523   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
524 
525   /* loop over matrix elements drawing boxes */
526   color = PETSC_DRAW_BLUE;
527   for (i=0,row=0; i<mbs; i++,row+=bs) {
528     for (j=a->i[i]; j<a->i[i+1]; j++) {
529       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
530       x_l = a->j[j]*bs; x_r = x_l + 1.0;
531       aa = a->a + j*bs2;
532       for (k=0; k<bs; k++) {
533         for (l=0; l<bs; l++) {
534           if (PetscRealPart(*aa++) >=  0.) continue;
535           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
536         }
537       }
538     }
539   }
540   color = PETSC_DRAW_CYAN;
541   for (i=0,row=0; i<mbs; i++,row+=bs) {
542     for (j=a->i[i]; j<a->i[i+1]; j++) {
543       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
544       x_l = a->j[j]*bs; x_r = x_l + 1.0;
545       aa = a->a + j*bs2;
546       for (k=0; k<bs; k++) {
547         for (l=0; l<bs; l++) {
548           if (PetscRealPart(*aa++) != 0.) continue;
549           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
550         }
551       }
552     }
553   }
554 
555   color = PETSC_DRAW_RED;
556   for (i=0,row=0; i<mbs; i++,row+=bs) {
557     for (j=a->i[i]; j<a->i[i+1]; j++) {
558       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
559       x_l = a->j[j]*bs; x_r = x_l + 1.0;
560       aa = a->a + j*bs2;
561       for (k=0; k<bs; k++) {
562         for (l=0; l<bs; l++) {
563           if (PetscRealPart(*aa++) <= 0.) continue;
564           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
565         }
566       }
567     }
568   }
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
574 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
575 {
576   int          ierr;
577   PetscReal    xl,yl,xr,yr,w,h;
578   PetscDraw    draw;
579   PetscTruth   isnull;
580 
581   PetscFunctionBegin;
582 
583   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
584   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
585 
586   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
587   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
588   xr += w;    yr += h;  xl = -w;     yl = -h;
589   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
590   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
591   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
592   PetscFunctionReturn(0);
593 }
594 
595 #undef __FUNCT__
596 #define __FUNCT__ "MatView_SeqBAIJ"
597 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
598 {
599   int        ierr;
600   PetscTruth issocket,isascii,isbinary,isdraw;
601 
602   PetscFunctionBegin;
603   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
604   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
605   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
606   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
607   if (issocket) {
608     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
609   } else if (isascii){
610     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
611   } else if (isbinary) {
612     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
613   } else if (isdraw) {
614     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
615   } else {
616     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
617   }
618   PetscFunctionReturn(0);
619 }
620 
621 
622 #undef __FUNCT__
623 #define __FUNCT__ "MatGetValues_SeqBAIJ"
624 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
625 {
626   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
627   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
628   int        *ai = a->i,*ailen = a->ilen;
629   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
630   MatScalar  *ap,*aa = a->a,zero = 0.0;
631 
632   PetscFunctionBegin;
633   for (k=0; k<m; k++) { /* loop over rows */
634     row  = im[k]; brow = row/bs;
635     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
636     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
637     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
638     nrow = ailen[brow];
639     for (l=0; l<n; l++) { /* loop over columns */
640       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
641       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
642       col  = in[l] ;
643       bcol = col/bs;
644       cidx = col%bs;
645       ridx = row%bs;
646       high = nrow;
647       low  = 0; /* assume unsorted */
648       while (high-low > 5) {
649         t = (low+high)/2;
650         if (rp[t] > bcol) high = t;
651         else             low  = t;
652       }
653       for (i=low; i<high; i++) {
654         if (rp[i] > bcol) break;
655         if (rp[i] == bcol) {
656           *v++ = ap[bs2*i+bs*cidx+ridx];
657           goto finished;
658         }
659       }
660       *v++ = zero;
661       finished:;
662     }
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 #if defined(PETSC_USE_MAT_SINGLE)
668 #undef __FUNCT__
669 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
670 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
671 {
672   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
673   int         ierr,i,N = m*n*b->bs2;
674   MatScalar   *vsingle;
675 
676   PetscFunctionBegin;
677   if (N > b->setvalueslen) {
678     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
679     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
680     b->setvalueslen  = N;
681   }
682   vsingle = b->setvaluescopy;
683   for (i=0; i<N; i++) {
684     vsingle[i] = v[i];
685   }
686   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 #endif
690 
691 
692 #undef __FUNCT__
693 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
694 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
695 {
696   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
697   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
698   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
699   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
700   PetscTruth  roworiented=a->roworiented;
701   MatScalar   *value = v,*ap,*aa = a->a,*bap;
702 
703   PetscFunctionBegin;
704   if (roworiented) {
705     stepval = (n-1)*bs;
706   } else {
707     stepval = (m-1)*bs;
708   }
709   for (k=0; k<m; k++) { /* loop over added rows */
710     row  = im[k];
711     if (row < 0) continue;
712 #if defined(PETSC_USE_BOPT_g)
713     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
714 #endif
715     rp   = aj + ai[row];
716     ap   = aa + bs2*ai[row];
717     rmax = imax[row];
718     nrow = ailen[row];
719     low  = 0;
720     for (l=0; l<n; l++) { /* loop over added columns */
721       if (in[l] < 0) continue;
722 #if defined(PETSC_USE_BOPT_g)
723       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
724 #endif
725       col = in[l];
726       if (roworiented) {
727         value = v + k*(stepval+bs)*bs + l*bs;
728       } else {
729         value = v + l*(stepval+bs)*bs + k*bs;
730       }
731       if (!sorted) low = 0; high = nrow;
732       while (high-low > 7) {
733         t = (low+high)/2;
734         if (rp[t] > col) high = t;
735         else             low  = t;
736       }
737       for (i=low; i<high; i++) {
738         if (rp[i] > col) break;
739         if (rp[i] == col) {
740           bap  = ap +  bs2*i;
741           if (roworiented) {
742             if (is == ADD_VALUES) {
743               for (ii=0; ii<bs; ii++,value+=stepval) {
744                 for (jj=ii; jj<bs2; jj+=bs) {
745                   bap[jj] += *value++;
746                 }
747               }
748             } else {
749               for (ii=0; ii<bs; ii++,value+=stepval) {
750                 for (jj=ii; jj<bs2; jj+=bs) {
751                   bap[jj] = *value++;
752                 }
753               }
754             }
755           } else {
756             if (is == ADD_VALUES) {
757               for (ii=0; ii<bs; ii++,value+=stepval) {
758                 for (jj=0; jj<bs; jj++) {
759                   *bap++ += *value++;
760                 }
761               }
762             } else {
763               for (ii=0; ii<bs; ii++,value+=stepval) {
764                 for (jj=0; jj<bs; jj++) {
765                   *bap++  = *value++;
766                 }
767               }
768             }
769           }
770           goto noinsert2;
771         }
772       }
773       if (nonew == 1) goto noinsert2;
774       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
775       if (nrow >= rmax) {
776         /* there is no extra room in row, therefore enlarge */
777         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
778         MatScalar *new_a;
779 
780         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
781 
782         /* malloc new storage space */
783         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
784 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
785         new_j   = (int*)(new_a + bs2*new_nz);
786         new_i   = new_j + new_nz;
787 
788         /* copy over old data into new slots */
789         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
790         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
791         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
792         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
793         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
794         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
795         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
796         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
797         /* free up old matrix storage */
798         ierr = PetscFree(a->a);CHKERRQ(ierr);
799         if (!a->singlemalloc) {
800           ierr = PetscFree(a->i);CHKERRQ(ierr);
801           ierr = PetscFree(a->j);CHKERRQ(ierr);
802         }
803         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
804         a->singlemalloc = PETSC_TRUE;
805 
806         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
807         rmax = imax[row] = imax[row] + CHUNKSIZE;
808         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
809         a->maxnz += bs2*CHUNKSIZE;
810         a->reallocs++;
811         a->nz++;
812       }
813       N = nrow++ - 1;
814       /* shift up all the later entries in this row */
815       for (ii=N; ii>=i; ii--) {
816         rp[ii+1] = rp[ii];
817         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
818       }
819       if (N >= i) {
820         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
821       }
822       rp[i] = col;
823       bap   = ap +  bs2*i;
824       if (roworiented) {
825         for (ii=0; ii<bs; ii++,value+=stepval) {
826           for (jj=ii; jj<bs2; jj+=bs) {
827             bap[jj] = *value++;
828           }
829         }
830       } else {
831         for (ii=0; ii<bs; ii++,value+=stepval) {
832           for (jj=0; jj<bs; jj++) {
833             *bap++  = *value++;
834           }
835         }
836       }
837       noinsert2:;
838       low = i;
839     }
840     ailen[row] = nrow;
841   }
842   PetscFunctionReturn(0);
843 }
844 
845 
846 #undef __FUNCT__
847 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
848 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
849 {
850   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
851   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
852   int        m = A->m,*ip,N,*ailen = a->ilen;
853   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
854   MatScalar  *aa = a->a,*ap;
855 
856   PetscFunctionBegin;
857   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
858 
859   if (m) rmax = ailen[0];
860   for (i=1; i<mbs; i++) {
861     /* move each row back by the amount of empty slots (fshift) before it*/
862     fshift += imax[i-1] - ailen[i-1];
863     rmax   = PetscMax(rmax,ailen[i]);
864     if (fshift) {
865       ip = aj + ai[i]; ap = aa + bs2*ai[i];
866       N = ailen[i];
867       for (j=0; j<N; j++) {
868         ip[j-fshift] = ip[j];
869         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
870       }
871     }
872     ai[i] = ai[i-1] + ailen[i-1];
873   }
874   if (mbs) {
875     fshift += imax[mbs-1] - ailen[mbs-1];
876     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
877   }
878   /* reset ilen and imax for each row */
879   for (i=0; i<mbs; i++) {
880     ailen[i] = imax[i] = ai[i+1] - ai[i];
881   }
882   a->nz = ai[mbs];
883 
884   /* diagonals may have moved, so kill the diagonal pointers */
885   if (fshift && a->diag) {
886     ierr = PetscFree(a->diag);CHKERRQ(ierr);
887     PetscLogObjectMemory(A,-(m+1)*sizeof(int));
888     a->diag = 0;
889   }
890   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
891   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
892   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
893   a->reallocs          = 0;
894   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
895 
896   PetscFunctionReturn(0);
897 }
898 
899 
900 
901 /*
902    This function returns an array of flags which indicate the locations of contiguous
903    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
904    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
905    Assume: sizes should be long enough to hold all the values.
906 */
907 #undef __FUNCT__
908 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
909 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
910 {
911   int        i,j,k,row;
912   PetscTruth flg;
913 
914   PetscFunctionBegin;
915   for (i=0,j=0; i<n; j++) {
916     row = idx[i];
917     if (row%bs!=0) { /* Not the begining of a block */
918       sizes[j] = 1;
919       i++;
920     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
921       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
922       i++;
923     } else { /* Begining of the block, so check if the complete block exists */
924       flg = PETSC_TRUE;
925       for (k=1; k<bs; k++) {
926         if (row+k != idx[i+k]) { /* break in the block */
927           flg = PETSC_FALSE;
928           break;
929         }
930       }
931       if (flg == PETSC_TRUE) { /* No break in the bs */
932         sizes[j] = bs;
933         i+= bs;
934       } else {
935         sizes[j] = 1;
936         i++;
937       }
938     }
939   }
940   *bs_max = j;
941   PetscFunctionReturn(0);
942 }
943 
944 #undef __FUNCT__
945 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
946 int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag)
947 {
948   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
949   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
950   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
951   PetscScalar zero = 0.0;
952   MatScalar   *aa;
953 
954   PetscFunctionBegin;
955   /* Make a copy of the IS and  sort it */
956   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
957   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
958 
959   /* allocate memory for rows,sizes */
960   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
961   sizes = rows + is_n;
962 
963   /* copy IS values to rows, and sort them */
964   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
965   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
966   if (baij->keepzeroedrows) {
967     for (i=0; i<is_n; i++) { sizes[i] = 1; }
968     bs_max = is_n;
969   } else {
970     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
971   }
972   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
973 
974   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
975     row   = rows[j];
976     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
977     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
978     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
979     if (sizes[i] == bs && !baij->keepzeroedrows) {
980       if (diag) {
981         if (baij->ilen[row/bs] > 0) {
982           baij->ilen[row/bs]       = 1;
983           baij->j[baij->i[row/bs]] = row/bs;
984           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
985         }
986         /* Now insert all the diagonal values for this bs */
987         for (k=0; k<bs; k++) {
988           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
989         }
990       } else { /* (!diag) */
991         baij->ilen[row/bs] = 0;
992       } /* end (!diag) */
993     } else { /* (sizes[i] != bs) */
994 #if defined (PETSC_USE_DEBUG)
995       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
996 #endif
997       for (k=0; k<count; k++) {
998         aa[0] =  zero;
999         aa    += bs;
1000       }
1001       if (diag) {
1002         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1003       }
1004     }
1005   }
1006 
1007   ierr = PetscFree(rows);CHKERRQ(ierr);
1008   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1014 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
1015 {
1016   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1017   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1018   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1019   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1020   int         ridx,cidx,bs2=a->bs2,ierr;
1021   PetscTruth  roworiented=a->roworiented;
1022   MatScalar   *ap,value,*aa=a->a,*bap;
1023 
1024   PetscFunctionBegin;
1025   for (k=0; k<m; k++) { /* loop over added rows */
1026     row  = im[k]; brow = row/bs;
1027     if (row < 0) continue;
1028 #if defined(PETSC_USE_BOPT_g)
1029     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1030 #endif
1031     rp   = aj + ai[brow];
1032     ap   = aa + bs2*ai[brow];
1033     rmax = imax[brow];
1034     nrow = ailen[brow];
1035     low  = 0;
1036     for (l=0; l<n; l++) { /* loop over added columns */
1037       if (in[l] < 0) continue;
1038 #if defined(PETSC_USE_BOPT_g)
1039       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1040 #endif
1041       col = in[l]; bcol = col/bs;
1042       ridx = row % bs; cidx = col % bs;
1043       if (roworiented) {
1044         value = v[l + k*n];
1045       } else {
1046         value = v[k + l*m];
1047       }
1048       if (!sorted) low = 0; high = nrow;
1049       while (high-low > 7) {
1050         t = (low+high)/2;
1051         if (rp[t] > bcol) high = t;
1052         else              low  = t;
1053       }
1054       for (i=low; i<high; i++) {
1055         if (rp[i] > bcol) break;
1056         if (rp[i] == bcol) {
1057           bap  = ap +  bs2*i + bs*cidx + ridx;
1058           if (is == ADD_VALUES) *bap += value;
1059           else                  *bap  = value;
1060           goto noinsert1;
1061         }
1062       }
1063       if (nonew == 1) goto noinsert1;
1064       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1065       if (nrow >= rmax) {
1066         /* there is no extra room in row, therefore enlarge */
1067         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1068         MatScalar *new_a;
1069 
1070         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1071 
1072         /* Malloc new storage space */
1073         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1074 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1075         new_j   = (int*)(new_a + bs2*new_nz);
1076         new_i   = new_j + new_nz;
1077 
1078         /* copy over old data into new slots */
1079         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1080         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1081         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1082         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1083         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1084         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1085         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1086         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1087         /* free up old matrix storage */
1088         ierr = PetscFree(a->a);CHKERRQ(ierr);
1089         if (!a->singlemalloc) {
1090           ierr = PetscFree(a->i);CHKERRQ(ierr);
1091           ierr = PetscFree(a->j);CHKERRQ(ierr);
1092         }
1093         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1094         a->singlemalloc = PETSC_TRUE;
1095 
1096         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1097         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1098         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1099         a->maxnz += bs2*CHUNKSIZE;
1100         a->reallocs++;
1101         a->nz++;
1102       }
1103       N = nrow++ - 1;
1104       /* shift up all the later entries in this row */
1105       for (ii=N; ii>=i; ii--) {
1106         rp[ii+1] = rp[ii];
1107         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1108       }
1109       if (N>=i) {
1110         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1111       }
1112       rp[i]                      = bcol;
1113       ap[bs2*i + bs*cidx + ridx] = value;
1114       noinsert1:;
1115       low = i;
1116     }
1117     ailen[brow] = nrow;
1118   }
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1125 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1126 {
1127   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1128   Mat         outA;
1129   int         ierr;
1130   PetscTruth  row_identity,col_identity;
1131 
1132   PetscFunctionBegin;
1133   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1134   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1135   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1136   if (!row_identity || !col_identity) {
1137     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1138   }
1139 
1140   outA          = inA;
1141   inA->factor   = FACTOR_LU;
1142 
1143   if (!a->diag) {
1144     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1145   }
1146 
1147   a->row        = row;
1148   a->col        = col;
1149   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1150   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1151 
1152   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1153   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1154   PetscLogObjectParent(inA,a->icol);
1155 
1156   /*
1157       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1158       for ILU(0) factorization with natural ordering
1159   */
1160   if (a->bs < 8) {
1161     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1162   } else {
1163     if (!a->solve_work) {
1164       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1165       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1166     }
1167   }
1168 
1169   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1170 
1171   PetscFunctionReturn(0);
1172 }
1173 #undef __FUNCT__
1174 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1175 int MatPrintHelp_SeqBAIJ(Mat A)
1176 {
1177   static PetscTruth called = PETSC_FALSE;
1178   MPI_Comm          comm = A->comm;
1179   int               ierr;
1180 
1181   PetscFunctionBegin;
1182   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1183   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1184   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 EXTERN_C_BEGIN
1189 #undef __FUNCT__
1190 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1191 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1192 {
1193   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1194   int         i,nz,n;
1195 
1196   PetscFunctionBegin;
1197   nz = baij->maxnz;
1198   n  = mat->n;
1199   for (i=0; i<nz; i++) {
1200     baij->j[i] = indices[i];
1201   }
1202   baij->nz = nz;
1203   for (i=0; i<n; i++) {
1204     baij->ilen[i] = baij->imax[i];
1205   }
1206 
1207   PetscFunctionReturn(0);
1208 }
1209 EXTERN_C_END
1210 
1211 #undef __FUNCT__
1212 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1213 /*@
1214     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1215        in the matrix.
1216 
1217   Input Parameters:
1218 +  mat - the SeqBAIJ matrix
1219 -  indices - the column indices
1220 
1221   Level: advanced
1222 
1223   Notes:
1224     This can be called if you have precomputed the nonzero structure of the
1225   matrix and want to provide it to the matrix object to improve the performance
1226   of the MatSetValues() operation.
1227 
1228     You MUST have set the correct numbers of nonzeros per row in the call to
1229   MatCreateSeqBAIJ().
1230 
1231     MUST be called before any calls to MatSetValues();
1232 
1233 @*/
1234 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1235 {
1236   int ierr,(*f)(Mat,int *);
1237 
1238   PetscFunctionBegin;
1239   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1240   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr);
1241   if (f) {
1242     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1243   } else {
1244     SETERRQ(1,"Wrong type of matrix to set column indices");
1245   }
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1251 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1252 {
1253   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1254   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1255   PetscReal    atmp;
1256   PetscScalar  *x,zero = 0.0;
1257   MatScalar    *aa;
1258   int          ncols,brow,krow,kcol;
1259 
1260   PetscFunctionBegin;
1261   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1262   bs   = a->bs;
1263   aa   = a->a;
1264   ai   = a->i;
1265   aj   = a->j;
1266   mbs = a->mbs;
1267 
1268   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1269   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1270   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1271   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1272   for (i=0; i<mbs; i++) {
1273     ncols = ai[1] - ai[0]; ai++;
1274     brow  = bs*i;
1275     for (j=0; j<ncols; j++){
1276       /* bcol = bs*(*aj); */
1277       for (kcol=0; kcol<bs; kcol++){
1278         for (krow=0; krow<bs; krow++){
1279           atmp = PetscAbsScalar(*aa); aa++;
1280           row = brow + krow;    /* row index */
1281           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1282           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1283         }
1284       }
1285       aj++;
1286     }
1287   }
1288   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1294 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1295 {
1296   int        ierr;
1297 
1298   PetscFunctionBegin;
1299   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1305 int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array)
1306 {
1307   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1308   PetscFunctionBegin;
1309   *array = a->a;
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1315 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array)
1316 {
1317   PetscFunctionBegin;
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 /* -------------------------------------------------------------------*/
1322 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1323        MatGetRow_SeqBAIJ,
1324        MatRestoreRow_SeqBAIJ,
1325        MatMult_SeqBAIJ_N,
1326        MatMultAdd_SeqBAIJ_N,
1327        MatMultTranspose_SeqBAIJ,
1328        MatMultTransposeAdd_SeqBAIJ,
1329        MatSolve_SeqBAIJ_N,
1330        0,
1331        0,
1332        0,
1333        MatLUFactor_SeqBAIJ,
1334        0,
1335        0,
1336        MatTranspose_SeqBAIJ,
1337        MatGetInfo_SeqBAIJ,
1338        MatEqual_SeqBAIJ,
1339        MatGetDiagonal_SeqBAIJ,
1340        MatDiagonalScale_SeqBAIJ,
1341        MatNorm_SeqBAIJ,
1342        0,
1343        MatAssemblyEnd_SeqBAIJ,
1344        0,
1345        MatSetOption_SeqBAIJ,
1346        MatZeroEntries_SeqBAIJ,
1347        MatZeroRows_SeqBAIJ,
1348        MatLUFactorSymbolic_SeqBAIJ,
1349        MatLUFactorNumeric_SeqBAIJ_N,
1350        0,
1351        0,
1352        MatSetUpPreallocation_SeqBAIJ,
1353        0,
1354        MatGetOwnershipRange_SeqBAIJ,
1355        MatILUFactorSymbolic_SeqBAIJ,
1356        0,
1357        MatGetArray_SeqBAIJ,
1358        MatRestoreArray_SeqBAIJ,
1359        MatDuplicate_SeqBAIJ,
1360        0,
1361        0,
1362        MatILUFactor_SeqBAIJ,
1363        0,
1364        0,
1365        MatGetSubMatrices_SeqBAIJ,
1366        MatIncreaseOverlap_SeqBAIJ,
1367        MatGetValues_SeqBAIJ,
1368        0,
1369        MatPrintHelp_SeqBAIJ,
1370        MatScale_SeqBAIJ,
1371        0,
1372        0,
1373        0,
1374        MatGetBlockSize_SeqBAIJ,
1375        MatGetRowIJ_SeqBAIJ,
1376        MatRestoreRowIJ_SeqBAIJ,
1377        0,
1378        0,
1379        0,
1380        0,
1381        0,
1382        0,
1383        MatSetValuesBlocked_SeqBAIJ,
1384        MatGetSubMatrix_SeqBAIJ,
1385        MatDestroy_SeqBAIJ,
1386        MatView_SeqBAIJ,
1387        MatGetPetscMaps_Petsc,
1388        0,
1389        0,
1390        0,
1391        0,
1392        0,
1393        0,
1394        MatGetRowMax_SeqBAIJ,
1395        MatConvert_Basic};
1396 
1397 EXTERN_C_BEGIN
1398 #undef __FUNCT__
1399 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
1400 int MatStoreValues_SeqBAIJ(Mat mat)
1401 {
1402   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1403   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1404   int         ierr;
1405 
1406   PetscFunctionBegin;
1407   if (aij->nonew != 1) {
1408     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1409   }
1410 
1411   /* allocate space for values if not already there */
1412   if (!aij->saved_values) {
1413     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
1414   }
1415 
1416   /* copy values over */
1417   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 EXTERN_C_END
1421 
1422 EXTERN_C_BEGIN
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
1425 int MatRetrieveValues_SeqBAIJ(Mat mat)
1426 {
1427   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1428   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1429 
1430   PetscFunctionBegin;
1431   if (aij->nonew != 1) {
1432     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1433   }
1434   if (!aij->saved_values) {
1435     SETERRQ(1,"Must call MatStoreValues(A);first");
1436   }
1437 
1438   /* copy values over */
1439   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
1440   PetscFunctionReturn(0);
1441 }
1442 EXTERN_C_END
1443 
1444 EXTERN_C_BEGIN
1445 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1446 EXTERN_C_END
1447 
1448 EXTERN_C_BEGIN
1449 #undef __FUNCT__
1450 #define __FUNCT__ "MatCreate_SeqBAIJ"
1451 int MatCreate_SeqBAIJ(Mat B)
1452 {
1453   int         ierr,size;
1454   Mat_SeqBAIJ *b;
1455 
1456   PetscFunctionBegin;
1457   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1458   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1459 
1460   B->m = B->M = PetscMax(B->m,B->M);
1461   B->n = B->N = PetscMax(B->n,B->N);
1462   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
1463   B->data = (void*)b;
1464   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1465   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1466   B->factor           = 0;
1467   B->lupivotthreshold = 1.0;
1468   B->mapping          = 0;
1469   b->row              = 0;
1470   b->col              = 0;
1471   b->icol             = 0;
1472   b->reallocs         = 0;
1473   b->saved_values     = 0;
1474 #if defined(PETSC_USE_MAT_SINGLE)
1475   b->setvalueslen     = 0;
1476   b->setvaluescopy    = PETSC_NULL;
1477 #endif
1478   b->single_precision_solves = PETSC_FALSE;
1479 
1480   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
1481   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
1482 
1483   b->sorted           = PETSC_FALSE;
1484   b->roworiented      = PETSC_TRUE;
1485   b->nonew            = 0;
1486   b->diag             = 0;
1487   b->solve_work       = 0;
1488   b->mult_work        = 0;
1489   b->spptr            = 0;
1490   B->info.nz_unneeded = (PetscReal)b->maxnz;
1491   b->keepzeroedrows   = PETSC_FALSE;
1492 
1493   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1494                                      "MatStoreValues_SeqBAIJ",
1495                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1496   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1497                                      "MatRetrieveValues_SeqBAIJ",
1498                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1499   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1500                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1501                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1502   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C",
1503                                      "MatConvert_SeqBAIJ_SeqAIJ",
1504                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
1505   PetscFunctionReturn(0);
1506 }
1507 EXTERN_C_END
1508 
1509 #undef __FUNCT__
1510 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
1511 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1512 {
1513   Mat         C;
1514   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1515   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1516 
1517   PetscFunctionBegin;
1518   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1519 
1520   *B = 0;
1521   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
1522   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
1523   c    = (Mat_SeqBAIJ*)C->data;
1524 
1525   c->bs         = a->bs;
1526   c->bs2        = a->bs2;
1527   c->mbs        = a->mbs;
1528   c->nbs        = a->nbs;
1529   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1530 
1531   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
1532   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
1533   for (i=0; i<mbs; i++) {
1534     c->imax[i] = a->imax[i];
1535     c->ilen[i] = a->ilen[i];
1536   }
1537 
1538   /* allocate the matrix space */
1539   c->singlemalloc = PETSC_TRUE;
1540   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1541   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
1542   c->j = (int*)(c->a + nz*bs2);
1543   c->i = c->j + nz;
1544   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1545   if (mbs > 0) {
1546     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1547     if (cpvalues == MAT_COPY_VALUES) {
1548       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1549     } else {
1550       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1551     }
1552   }
1553 
1554   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1555   c->sorted      = a->sorted;
1556   c->roworiented = a->roworiented;
1557   c->nonew       = a->nonew;
1558 
1559   if (a->diag) {
1560     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
1561     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1562     for (i=0; i<mbs; i++) {
1563       c->diag[i] = a->diag[i];
1564     }
1565   } else c->diag        = 0;
1566   c->nz                 = a->nz;
1567   c->maxnz              = a->maxnz;
1568   c->solve_work         = 0;
1569   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1570   c->mult_work          = 0;
1571   C->preallocated       = PETSC_TRUE;
1572   C->assembled          = PETSC_TRUE;
1573   *B = C;
1574   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 EXTERN_C_BEGIN
1579 #undef __FUNCT__
1580 #define __FUNCT__ "MatLoad_SeqBAIJ"
1581 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1582 {
1583   Mat_SeqBAIJ  *a;
1584   Mat          B;
1585   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1586   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1587   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1588   int          *masked,nmask,tmp,bs2,ishift;
1589   PetscScalar  *aa;
1590   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1591 
1592   PetscFunctionBegin;
1593   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1594   bs2  = bs*bs;
1595 
1596   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1597   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1598   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1599   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1600   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1601   M = header[1]; N = header[2]; nz = header[3];
1602 
1603   if (header[3] < 0) {
1604     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1605   }
1606 
1607   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1608 
1609   /*
1610      This code adds extra rows to make sure the number of rows is
1611     divisible by the blocksize
1612   */
1613   mbs        = M/bs;
1614   extra_rows = bs - M + bs*(mbs);
1615   if (extra_rows == bs) extra_rows = 0;
1616   else                  mbs++;
1617   if (extra_rows) {
1618     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1619   }
1620 
1621   /* read in row lengths */
1622   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
1623   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1624   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1625 
1626   /* read in column indices */
1627   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
1628   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1629   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1630 
1631   /* loop over row lengths determining block row lengths */
1632   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
1633   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1634   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
1635   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1636   masked   = mask + mbs;
1637   rowcount = 0; nzcount = 0;
1638   for (i=0; i<mbs; i++) {
1639     nmask = 0;
1640     for (j=0; j<bs; j++) {
1641       kmax = rowlengths[rowcount];
1642       for (k=0; k<kmax; k++) {
1643         tmp = jj[nzcount++]/bs;
1644         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1645       }
1646       rowcount++;
1647     }
1648     browlengths[i] += nmask;
1649     /* zero out the mask elements we set */
1650     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1651   }
1652 
1653   /* create our matrix */
1654   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1655   B = *A;
1656   a = (Mat_SeqBAIJ*)B->data;
1657 
1658   /* set matrix "i" values */
1659   a->i[0] = 0;
1660   for (i=1; i<= mbs; i++) {
1661     a->i[i]      = a->i[i-1] + browlengths[i-1];
1662     a->ilen[i-1] = browlengths[i-1];
1663   }
1664   a->nz         = 0;
1665   for (i=0; i<mbs; i++) a->nz += browlengths[i];
1666 
1667   /* read in nonzero values */
1668   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1669   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1670   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1671 
1672   /* set "a" and "j" values into matrix */
1673   nzcount = 0; jcount = 0;
1674   for (i=0; i<mbs; i++) {
1675     nzcountb = nzcount;
1676     nmask    = 0;
1677     for (j=0; j<bs; j++) {
1678       kmax = rowlengths[i*bs+j];
1679       for (k=0; k<kmax; k++) {
1680         tmp = jj[nzcount++]/bs;
1681 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1682       }
1683     }
1684     /* sort the masked values */
1685     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1686 
1687     /* set "j" values into matrix */
1688     maskcount = 1;
1689     for (j=0; j<nmask; j++) {
1690       a->j[jcount++]  = masked[j];
1691       mask[masked[j]] = maskcount++;
1692     }
1693     /* set "a" values into matrix */
1694     ishift = bs2*a->i[i];
1695     for (j=0; j<bs; j++) {
1696       kmax = rowlengths[i*bs+j];
1697       for (k=0; k<kmax; k++) {
1698         tmp       = jj[nzcountb]/bs ;
1699         block     = mask[tmp] - 1;
1700         point     = jj[nzcountb] - bs*tmp;
1701         idx       = ishift + bs2*block + j + bs*point;
1702         a->a[idx] = (MatScalar)aa[nzcountb++];
1703       }
1704     }
1705     /* zero out the mask elements we set */
1706     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1707   }
1708   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1709 
1710   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1711   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1712   ierr = PetscFree(aa);CHKERRQ(ierr);
1713   ierr = PetscFree(jj);CHKERRQ(ierr);
1714   ierr = PetscFree(mask);CHKERRQ(ierr);
1715 
1716   B->assembled = PETSC_TRUE;
1717 
1718   ierr = MatView_Private(B);CHKERRQ(ierr);
1719   PetscFunctionReturn(0);
1720 }
1721 EXTERN_C_END
1722 
1723 #undef __FUNCT__
1724 #define __FUNCT__ "MatCreateSeqBAIJ"
1725 /*@C
1726    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1727    compressed row) format.  For good matrix assembly performance the
1728    user should preallocate the matrix storage by setting the parameter nz
1729    (or the array nnz).  By setting these parameters accurately, performance
1730    during matrix assembly can be increased by more than a factor of 50.
1731 
1732    Collective on MPI_Comm
1733 
1734    Input Parameters:
1735 +  comm - MPI communicator, set to PETSC_COMM_SELF
1736 .  bs - size of block
1737 .  m - number of rows
1738 .  n - number of columns
1739 .  nz - number of nonzero blocks  per block row (same for all rows)
1740 -  nnz - array containing the number of nonzero blocks in the various block rows
1741          (possibly different for each block row) or PETSC_NULL
1742 
1743    Output Parameter:
1744 .  A - the matrix
1745 
1746    Options Database Keys:
1747 .   -mat_no_unroll - uses code that does not unroll the loops in the
1748                      block calculations (much slower)
1749 .    -mat_block_size - size of the blocks to use
1750 
1751    Level: intermediate
1752 
1753    Notes:
1754    A nonzero block is any block that as 1 or more nonzeros in it
1755 
1756    The block AIJ format is fully compatible with standard Fortran 77
1757    storage.  That is, the stored row and column indices can begin at
1758    either one (as in Fortran) or zero.  See the users' manual for details.
1759 
1760    Specify the preallocated storage with either nz or nnz (not both).
1761    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1762    allocation.  For additional details, see the users manual chapter on
1763    matrices.
1764 
1765 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1766 @*/
1767 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1768 {
1769   int ierr;
1770 
1771   PetscFunctionBegin;
1772   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
1773   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
1774   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 #undef __FUNCT__
1779 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
1780 /*@C
1781    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1782    per row in the matrix. For good matrix assembly performance the
1783    user should preallocate the matrix storage by setting the parameter nz
1784    (or the array nnz).  By setting these parameters accurately, performance
1785    during matrix assembly can be increased by more than a factor of 50.
1786 
1787    Collective on MPI_Comm
1788 
1789    Input Parameters:
1790 +  A - the matrix
1791 .  bs - size of block
1792 .  nz - number of block nonzeros per block row (same for all rows)
1793 -  nnz - array containing the number of block nonzeros in the various block rows
1794          (possibly different for each block row) or PETSC_NULL
1795 
1796    Options Database Keys:
1797 .   -mat_no_unroll - uses code that does not unroll the loops in the
1798                      block calculations (much slower)
1799 .    -mat_block_size - size of the blocks to use
1800 
1801    Level: intermediate
1802 
1803    Notes:
1804    The block AIJ format is fully compatible with standard Fortran 77
1805    storage.  That is, the stored row and column indices can begin at
1806    either one (as in Fortran) or zero.  See the users' manual for details.
1807 
1808    Specify the preallocated storage with either nz or nnz (not both).
1809    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1810    allocation.  For additional details, see the users manual chapter on
1811    matrices.
1812 
1813 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1814 @*/
1815 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1816 {
1817   Mat_SeqBAIJ *b;
1818   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1819   PetscTruth  flg;
1820 
1821   PetscFunctionBegin;
1822   ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr);
1823   if (!flg) PetscFunctionReturn(0);
1824 
1825   B->preallocated = PETSC_TRUE;
1826   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
1827   if (nnz && newbs != bs) {
1828     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1829   }
1830   bs   = newbs;
1831 
1832   mbs  = B->m/bs;
1833   nbs  = B->n/bs;
1834   bs2  = bs*bs;
1835 
1836   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1837     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1838   }
1839 
1840   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1841   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1842   if (nnz) {
1843     for (i=0; i<mbs; i++) {
1844       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1845       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1846     }
1847   }
1848 
1849   b       = (Mat_SeqBAIJ*)B->data;
1850   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1851   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1852   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1853   if (!flg) {
1854     switch (bs) {
1855     case 1:
1856       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1857       B->ops->mult            = MatMult_SeqBAIJ_1;
1858       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1859       break;
1860     case 2:
1861       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1862       B->ops->mult            = MatMult_SeqBAIJ_2;
1863       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1864       break;
1865     case 3:
1866       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1867       B->ops->mult            = MatMult_SeqBAIJ_3;
1868       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1869       break;
1870     case 4:
1871       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1872       B->ops->mult            = MatMult_SeqBAIJ_4;
1873       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1874       break;
1875     case 5:
1876       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1877       B->ops->mult            = MatMult_SeqBAIJ_5;
1878       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1879       break;
1880     case 6:
1881       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1882       B->ops->mult            = MatMult_SeqBAIJ_6;
1883       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1884       break;
1885     case 7:
1886       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
1887       B->ops->mult            = MatMult_SeqBAIJ_7;
1888       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1889       break;
1890     default:
1891       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
1892       B->ops->mult            = MatMult_SeqBAIJ_N;
1893       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
1894       break;
1895     }
1896   }
1897   b->bs      = bs;
1898   b->mbs     = mbs;
1899   b->nbs     = nbs;
1900   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
1901   if (!nnz) {
1902     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1903     else if (nz <= 0)        nz = 1;
1904     for (i=0; i<mbs; i++) b->imax[i] = nz;
1905     nz = nz*mbs;
1906   } else {
1907     nz = 0;
1908     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
1909   }
1910 
1911   /* allocate the matrix space */
1912   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
1913   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
1914   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1915   b->j  = (int*)(b->a + nz*bs2);
1916   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1917   b->i  = b->j + nz;
1918   b->singlemalloc = PETSC_TRUE;
1919 
1920   b->i[0] = 0;
1921   for (i=1; i<mbs+1; i++) {
1922     b->i[i] = b->i[i-1] + b->imax[i-1];
1923   }
1924 
1925   /* b->ilen will count nonzeros in each block row so far. */
1926   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
1927   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1928   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
1929 
1930   b->bs               = bs;
1931   b->bs2              = bs2;
1932   b->mbs              = mbs;
1933   b->nz               = 0;
1934   b->maxnz            = nz*bs2;
1935   B->info.nz_unneeded = (PetscReal)b->maxnz;
1936   PetscFunctionReturn(0);
1937 }
1938 
1939