xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 4787f7683dc5ea85e35b4269f7afceca0dae908f)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: baij.c,v 1.162 1999/02/15 18:32:41 balay Exp balay $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 #include "sys.h"
10 #include "src/mat/impls/baij/seq/baij.h"
11 #include "src/vec/vecimpl.h"
12 #include "src/inline/spops.h"
13 
14 #define CHUNKSIZE  10
15 
16 /*
17      Checks for missing diagonals
18 */
19 #undef __FUNC__
20 #define __FUNC__ "MatMissingDiag_SeqBAIJ"
21 int MatMissingDiag_SeqBAIJ(Mat A)
22 {
23   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
24   int         *diag = a->diag, *jj = a->j,i;
25 
26   PetscFunctionBegin;
27   for ( i=0; i<a->mbs; i++ ) {
28     if (jj[diag[i]] != i) {
29       SETERRQ1(1,1,"Matrix is missing diagonal number %d",i);
30     }
31   }
32   PetscFunctionReturn(0);
33 }
34 
35 #undef __FUNC__
36 #define __FUNC__ "MatMarkDiag_SeqBAIJ"
37 int MatMarkDiag_SeqBAIJ(Mat A)
38 {
39   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
40   int         i,j, *diag, m = a->mbs;
41 
42   PetscFunctionBegin;
43   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
44   PLogObjectMemory(A,(m+1)*sizeof(int));
45   for ( i=0; i<m; i++ ) {
46     diag[i] = a->i[i+1];
47     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
48       if (a->j[j] == i) {
49         diag[i] = j;
50         break;
51       }
52     }
53   }
54   a->diag = diag;
55   PetscFunctionReturn(0);
56 }
57 
58 
59 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
60 
61 #undef __FUNC__
62 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
63 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
64                             PetscTruth *done)
65 {
66   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
67   int         ierr, n = a->mbs,i;
68 
69   PetscFunctionBegin;
70   *nn = n;
71   if (!ia) PetscFunctionReturn(0);
72   if (symmetric) {
73     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
74   } else if (oshift == 1) {
75     /* temporarily add 1 to i and j indices */
76     int nz = a->i[n] + 1;
77     for ( i=0; i<nz; i++ ) a->j[i]++;
78     for ( i=0; i<n+1; i++ ) a->i[i]++;
79     *ia = a->i; *ja = a->j;
80   } else {
81     *ia = a->i; *ja = a->j;
82   }
83 
84   PetscFunctionReturn(0);
85 }
86 
87 #undef __FUNC__
88 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
89 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
90                                 PetscTruth *done)
91 {
92   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
93   int         i,n = a->mbs;
94 
95   PetscFunctionBegin;
96   if (!ia) PetscFunctionReturn(0);
97   if (symmetric) {
98     PetscFree(*ia);
99     PetscFree(*ja);
100   } else if (oshift == 1) {
101     int nz = a->i[n];
102     for ( i=0; i<nz; i++ ) a->j[i]--;
103     for ( i=0; i<n+1; i++ ) a->i[i]--;
104   }
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNC__
109 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
110 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
111 {
112   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
113 
114   PetscFunctionBegin;
115   *bs = baij->bs;
116   PetscFunctionReturn(0);
117 }
118 
119 
120 #undef __FUNC__
121 #define __FUNC__ "MatDestroy_SeqBAIJ"
122 int MatDestroy_SeqBAIJ(Mat A)
123 {
124   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
125   int         ierr;
126 
127   if (--A->refct > 0) PetscFunctionReturn(0);
128 
129   if (A->mapping) {
130     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
131   }
132   if (A->bmapping) {
133     ierr = ISLocalToGlobalMappingDestroy(A->bmapping); CHKERRQ(ierr);
134   }
135   if (A->rmap) {
136     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
137   }
138   if (A->cmap) {
139     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
140   }
141 #if defined(USE_PETSC_LOG)
142   PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
143 #endif
144   PetscFree(a->a);
145   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
146   if (a->diag) PetscFree(a->diag);
147   if (a->ilen) PetscFree(a->ilen);
148   if (a->imax) PetscFree(a->imax);
149   if (a->solve_work) PetscFree(a->solve_work);
150   if (a->mult_work) PetscFree(a->mult_work);
151   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
152   PetscFree(a);
153   PLogObjectDestroy(A);
154   PetscHeaderDestroy(A);
155   PetscFunctionReturn(0);
156 }
157 
158 #undef __FUNC__
159 #define __FUNC__ "MatSetOption_SeqBAIJ"
160 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
161 {
162   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
163 
164   PetscFunctionBegin;
165   if      (op == MAT_ROW_ORIENTED)                 a->roworiented = 1;
166   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented = 0;
167   else if (op == MAT_COLUMNS_SORTED)               a->sorted      = 1;
168   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted      = 0;
169   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew       = 1;
170   else if (op == MAT_NEW_NONZERO_LOCATION_ERR)     a->nonew       = -1;
171   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR)   a->nonew       = -2;
172   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew       = 0;
173   else if (op == MAT_ROWS_SORTED ||
174            op == MAT_ROWS_UNSORTED ||
175            op == MAT_SYMMETRIC ||
176            op == MAT_STRUCTURALLY_SYMMETRIC ||
177            op == MAT_YES_NEW_DIAGONALS ||
178            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
179            op == MAT_USE_HASH_TABLE) {
180     PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
181   } else if (op == MAT_NO_NEW_DIAGONALS) {
182     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
183   } else {
184     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
185   }
186   PetscFunctionReturn(0);
187 }
188 
189 
190 #undef __FUNC__
191 #define __FUNC__ "MatGetSize_SeqBAIJ"
192 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
193 {
194   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
195 
196   PetscFunctionBegin;
197   if (m) *m = a->m;
198   if (n) *n = a->n;
199   PetscFunctionReturn(0);
200 }
201 
202 #undef __FUNC__
203 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
204 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
205 {
206   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
207 
208   PetscFunctionBegin;
209   *m = 0; *n = a->m;
210   PetscFunctionReturn(0);
211 }
212 
213 #undef __FUNC__
214 #define __FUNC__ "MatGetRow_SeqBAIJ"
215 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
216 {
217   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ *) A->data;
218   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
219   MatScalar    *aa,*aa_i;
220   Scalar       *v_i;
221 
222   PetscFunctionBegin;
223   bs  = a->bs;
224   ai  = a->i;
225   aj  = a->j;
226   aa  = a->a;
227   bs2 = a->bs2;
228 
229   if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range");
230 
231   bn  = row/bs;   /* Block number */
232   bp  = row % bs; /* Block Position */
233   M   = ai[bn+1] - ai[bn];
234   *nz = bs*M;
235 
236   if (v) {
237     *v = 0;
238     if (*nz) {
239       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
240       for ( i=0; i<M; i++ ) { /* for each block in the block row */
241         v_i  = *v + i*bs;
242         aa_i = aa + bs2*(ai[bn] + i);
243         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
244       }
245     }
246   }
247 
248   if (idx) {
249     *idx = 0;
250     if (*nz) {
251       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
252       for ( i=0; i<M; i++ ) { /* for each block in the block row */
253         idx_i = *idx + i*bs;
254         itmp  = bs*aj[ai[bn] + i];
255         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
256       }
257     }
258   }
259   PetscFunctionReturn(0);
260 }
261 
262 #undef __FUNC__
263 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
264 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
265 {
266   PetscFunctionBegin;
267   if (idx) {if (*idx) PetscFree(*idx);}
268   if (v)   {if (*v)   PetscFree(*v);}
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNC__
273 #define __FUNC__ "MatTranspose_SeqBAIJ"
274 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
275 {
276   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
277   Mat         C;
278   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
279   int         *rows,*cols,bs2=a->bs2;
280   MatScalar   *array = a->a;
281 
282   PetscFunctionBegin;
283   if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place");
284   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
285   PetscMemzero(col,(1+nbs)*sizeof(int));
286 
287   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
288   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
289   PetscFree(col);
290   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
291   cols = rows + bs;
292   for ( i=0; i<mbs; i++ ) {
293     cols[0] = i*bs;
294     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
295     len = ai[i+1] - ai[i];
296     for ( j=0; j<len; j++ ) {
297       rows[0] = (*aj++)*bs;
298       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
299       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
300       array += bs2;
301     }
302   }
303   PetscFree(rows);
304 
305   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
306   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
307 
308   if (B != PETSC_NULL) {
309     *B = C;
310   } else {
311     PetscOps *Abops;
312     MatOps   Aops;
313 
314     /* This isn't really an in-place transpose */
315     PetscFree(a->a);
316     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
317     if (a->diag) PetscFree(a->diag);
318     if (a->ilen) PetscFree(a->ilen);
319     if (a->imax) PetscFree(a->imax);
320     if (a->solve_work) PetscFree(a->solve_work);
321     PetscFree(a);
322 
323 
324     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
325     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
326 
327     /*
328        This is horrible, horrible code. We need to keep the
329       A pointers for the bops and ops but copy everything
330       else from C.
331     */
332     Abops    = A->bops;
333     Aops     = A->ops;
334     PetscMemcpy(A,C,sizeof(struct _p_Mat));
335     A->bops  = Abops;
336     A->ops   = Aops;
337     A->qlist = 0;
338 
339     PetscHeaderDestroy(C);
340   }
341   PetscFunctionReturn(0);
342 }
343 
344 
345 
346 
347 #undef __FUNC__
348 #define __FUNC__ "MatView_SeqBAIJ_Binary"
349 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
350 {
351   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
352   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
353   Scalar      *aa;
354 
355   PetscFunctionBegin;
356   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
357   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
358   col_lens[0] = MAT_COOKIE;
359 
360   col_lens[1] = a->m;
361   col_lens[2] = a->n;
362   col_lens[3] = a->nz*bs2;
363 
364   /* store lengths of each row and write (including header) to file */
365   count = 0;
366   for ( i=0; i<a->mbs; i++ ) {
367     for ( j=0; j<bs; j++ ) {
368       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
369     }
370   }
371   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1); CHKERRQ(ierr);
372   PetscFree(col_lens);
373 
374   /* store column indices (zero start index) */
375   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj);
376   count = 0;
377   for ( i=0; i<a->mbs; i++ ) {
378     for ( j=0; j<bs; j++ ) {
379       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
380         for ( l=0; l<bs; l++ ) {
381           jj[count++] = bs*a->j[k] + l;
382         }
383       }
384     }
385   }
386   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0); CHKERRQ(ierr);
387   PetscFree(jj);
388 
389   /* store nonzero values */
390   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa);
391   count = 0;
392   for ( i=0; i<a->mbs; i++ ) {
393     for ( j=0; j<bs; j++ ) {
394       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
395         for ( l=0; l<bs; l++ ) {
396           aa[count++] = a->a[bs2*k + l*bs + j];
397         }
398       }
399     }
400   }
401   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0); CHKERRQ(ierr);
402   PetscFree(aa);
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNC__
407 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
408 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
409 {
410   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
411   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
412   FILE        *fd;
413   char        *outputname;
414 
415   PetscFunctionBegin;
416   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
417   ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr);
418   ierr = ViewerGetFormat(viewer,&format);
419   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
420     ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);
421   } else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
422     SETERRQ(PETSC_ERR_SUP,0,"Socket format not supported");
423   } else if (format == VIEWER_FORMAT_ASCII_COMMON) {
424     for ( i=0; i<a->mbs; i++ ) {
425       for ( j=0; j<bs; j++ ) {
426         fprintf(fd,"row %d:",i*bs+j);
427         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
428           for ( l=0; l<bs; l++ ) {
429 #if defined(USE_PETSC_COMPLEX)
430             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
431               fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
432                       PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));
433             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
434               fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
435                       PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));
436             } else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
437               fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));
438             }
439 #else
440             if (a->a[bs2*k + l*bs + j] != 0.0) {
441               fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
442             }
443 #endif
444           }
445         }
446         fprintf(fd,"\n");
447       }
448     }
449   } else {
450     for ( i=0; i<a->mbs; i++ ) {
451       for ( j=0; j<bs; j++ ) {
452         fprintf(fd,"row %d:",i*bs+j);
453         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
454           for ( l=0; l<bs; l++ ) {
455 #if defined(USE_PETSC_COMPLEX)
456             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) {
457               fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
458                 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));
459             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) {
460               fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
461                 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));
462             } else {
463               fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));
464             }
465 #else
466             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
467 #endif
468           }
469         }
470         fprintf(fd,"\n");
471       }
472     }
473   }
474   fflush(fd);
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNC__
479 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom"
480 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa)
481 {
482   Mat          A = (Mat) Aa;
483   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
484   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
485   double       xl,yl,xr,yr,x_l,x_r,y_l,y_r;
486   MatScalar    *aa;
487   MPI_Comm     comm;
488   Viewer       viewer;
489 
490   PetscFunctionBegin;
491   /*
492       This is nasty. If this is called from an originally parallel matrix
493    then all processes call this, but only the first has the matrix so the
494    rest should return immediately.
495   */
496   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
497   MPI_Comm_rank(comm,&rank);
498   if (rank) PetscFunctionReturn(0);
499 
500   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr);
501 
502   ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr); CHKERRQ(ierr);
503 
504   /* loop over matrix elements drawing boxes */
505   color = DRAW_BLUE;
506   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
507     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
508       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
509       x_l = a->j[j]*bs; x_r = x_l + 1.0;
510       aa = a->a + j*bs2;
511       for ( k=0; k<bs; k++ ) {
512         for ( l=0; l<bs; l++ ) {
513           if (PetscReal(*aa++) >=  0.) continue;
514           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
515         }
516       }
517     }
518   }
519   color = DRAW_CYAN;
520   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
521     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
522       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
523       x_l = a->j[j]*bs; x_r = x_l + 1.0;
524       aa = a->a + j*bs2;
525       for ( k=0; k<bs; k++ ) {
526         for ( l=0; l<bs; l++ ) {
527           if (PetscReal(*aa++) != 0.) continue;
528           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
529         }
530       }
531     }
532   }
533 
534   color = DRAW_RED;
535   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
536     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
537       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
538       x_l = a->j[j]*bs; x_r = x_l + 1.0;
539       aa = a->a + j*bs2;
540       for ( k=0; k<bs; k++ ) {
541         for ( l=0; l<bs; l++ ) {
542           if (PetscReal(*aa++) <= 0.) continue;
543           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
544         }
545       }
546     }
547   }
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNC__
552 #define __FUNC__ "MatView_SeqBAIJ_Draw"
553 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
554 {
555   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
556   int          ierr;
557   double       xl,yl,xr,yr,w,h;
558   Draw         draw;
559   PetscTruth   isnull;
560 
561   PetscFunctionBegin;
562 
563   ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
564   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
565 
566   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
567   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
568   xr += w;    yr += h;  xl = -w;     yl = -h;
569   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
570   ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A); CHKERRQ(ierr);
571   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNC__
576 #define __FUNC__ "MatView_SeqBAIJ"
577 int MatView_SeqBAIJ(Mat A,Viewer viewer)
578 {
579   ViewerType  vtype;
580   int         ierr;
581 
582   PetscFunctionBegin;
583   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
584   if (PetscTypeCompare(vtype,SOCKET_VIEWER)) {
585     SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported");
586   } else if (PetscTypeCompare(vtype,ASCII_VIEWER)){
587     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
588   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
589     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
590   } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
591     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
592   } else {
593     SETERRQ(1,1,"Viewer type not supported by PETSc object");
594   }
595   PetscFunctionReturn(0);
596 }
597 
598 
599 #undef __FUNC__
600 #define __FUNC__ "MatGetValues_SeqBAIJ"
601 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
602 {
603   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
604   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
605   int        *ai = a->i, *ailen = a->ilen;
606   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
607   MatScalar  *ap, *aa = a->a, zero = 0.0;
608 
609   PetscFunctionBegin;
610   for ( k=0; k<m; k++ ) { /* loop over rows */
611     row  = im[k]; brow = row/bs;
612     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
613     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
614     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
615     nrow = ailen[brow];
616     for ( l=0; l<n; l++ ) { /* loop over columns */
617       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
618       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
619       col  = in[l] ;
620       bcol = col/bs;
621       cidx = col%bs;
622       ridx = row%bs;
623       high = nrow;
624       low  = 0; /* assume unsorted */
625       while (high-low > 5) {
626         t = (low+high)/2;
627         if (rp[t] > bcol) high = t;
628         else             low  = t;
629       }
630       for ( i=low; i<high; i++ ) {
631         if (rp[i] > bcol) break;
632         if (rp[i] == bcol) {
633           *v++ = ap[bs2*i+bs*cidx+ridx];
634           goto finished;
635         }
636       }
637       *v++ = zero;
638       finished:;
639     }
640   }
641   PetscFunctionReturn(0);
642 }
643 
644 
645 #undef __FUNC__
646 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
647 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
648 {
649   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
650   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
651   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented;
652   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval;
653   Scalar      *value = v;
654   MatScalar   *ap,*aa=a->a,*bap;
655 
656   PetscFunctionBegin;
657   if (roworiented) {
658     stepval = (n-1)*bs;
659   } else {
660     stepval = (m-1)*bs;
661   }
662   for ( k=0; k<m; k++ ) { /* loop over added rows */
663     row  = im[k];
664     if (row < 0) continue;
665 #if defined(USE_PETSC_BOPT_g)
666     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
667 #endif
668     rp   = aj + ai[row];
669     ap   = aa + bs2*ai[row];
670     rmax = imax[row];
671     nrow = ailen[row];
672     low  = 0;
673     for ( l=0; l<n; l++ ) { /* loop over added columns */
674       if (in[l] < 0) continue;
675 #if defined(USE_PETSC_BOPT_g)
676       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
677 #endif
678       col = in[l];
679       if (roworiented) {
680         value = v + k*(stepval+bs)*bs + l*bs;
681       } else {
682         value = v + l*(stepval+bs)*bs + k*bs;
683       }
684       if (!sorted) low = 0; high = nrow;
685       while (high-low > 7) {
686         t = (low+high)/2;
687         if (rp[t] > col) high = t;
688         else             low  = t;
689       }
690       for ( i=low; i<high; i++ ) {
691         if (rp[i] > col) break;
692         if (rp[i] == col) {
693           bap  = ap +  bs2*i;
694           if (roworiented) {
695             if (is == ADD_VALUES) {
696               for ( ii=0; ii<bs; ii++,value+=stepval ) {
697                 for (jj=ii; jj<bs2; jj+=bs ) {
698                   bap[jj] += *value++;
699                 }
700               }
701             } else {
702               for ( ii=0; ii<bs; ii++,value+=stepval ) {
703                 for (jj=ii; jj<bs2; jj+=bs ) {
704                   bap[jj] = *value++;
705                 }
706               }
707             }
708           } else {
709             if (is == ADD_VALUES) {
710               for ( ii=0; ii<bs; ii++,value+=stepval ) {
711                 for (jj=0; jj<bs; jj++ ) {
712                   *bap++ += *value++;
713                 }
714               }
715             } else {
716               for ( ii=0; ii<bs; ii++,value+=stepval ) {
717                 for (jj=0; jj<bs; jj++ ) {
718                   *bap++  = *value++;
719                 }
720               }
721             }
722           }
723           goto noinsert2;
724         }
725       }
726       if (nonew == 1) goto noinsert2;
727       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
728       if (nrow >= rmax) {
729         /* there is no extra room in row, therefore enlarge */
730         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
731         MatScalar *new_a;
732 
733         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
734 
735         /* malloc new storage space */
736         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
737         new_a   = (MatScalar *) PetscMalloc( len ); CHKPTRQ(new_a);
738         new_j   = (int *) (new_a + bs2*new_nz);
739         new_i   = new_j + new_nz;
740 
741         /* copy over old data into new slots */
742         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
743         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
744         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
745         len = (new_nz - CHUNKSIZE - ai[row] - nrow);
746         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
747         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));
748         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
749         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));
750         /* free up old matrix storage */
751         PetscFree(a->a);
752         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
753         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
754         a->singlemalloc = 1;
755 
756         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
757         rmax = imax[row] = imax[row] + CHUNKSIZE;
758         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
759         a->maxnz += bs2*CHUNKSIZE;
760         a->reallocs++;
761         a->nz++;
762       }
763       N = nrow++ - 1;
764       /* shift up all the later entries in this row */
765       for ( ii=N; ii>=i; ii-- ) {
766         rp[ii+1] = rp[ii];
767         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
768       }
769       if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
770       rp[i] = col;
771       bap   = ap +  bs2*i;
772       if (roworiented) {
773         for ( ii=0; ii<bs; ii++,value+=stepval) {
774           for (jj=ii; jj<bs2; jj+=bs ) {
775             bap[jj] = *value++;
776           }
777         }
778       } else {
779         for ( ii=0; ii<bs; ii++,value+=stepval ) {
780           for (jj=0; jj<bs; jj++ ) {
781             *bap++  = *value++;
782           }
783         }
784       }
785       noinsert2:;
786       low = i;
787     }
788     ailen[row] = nrow;
789   }
790   PetscFunctionReturn(0);
791 }
792 
793 
794 #undef __FUNC__
795 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
796 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
797 {
798   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
799   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
800   int        m = a->m,*ip, N, *ailen = a->ilen;
801   int        mbs = a->mbs, bs2 = a->bs2,rmax = 0;
802   MatScalar  *aa = a->a, *ap;
803 
804   PetscFunctionBegin;
805   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
806 
807   if (m) rmax = ailen[0];
808   for ( i=1; i<mbs; i++ ) {
809     /* move each row back by the amount of empty slots (fshift) before it*/
810     fshift += imax[i-1] - ailen[i-1];
811     rmax   = PetscMax(rmax,ailen[i]);
812     if (fshift) {
813       ip = aj + ai[i]; ap = aa + bs2*ai[i];
814       N = ailen[i];
815       for ( j=0; j<N; j++ ) {
816         ip[j-fshift] = ip[j];
817         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
818       }
819     }
820     ai[i] = ai[i-1] + ailen[i-1];
821   }
822   if (mbs) {
823     fshift += imax[mbs-1] - ailen[mbs-1];
824     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
825   }
826   /* reset ilen and imax for each row */
827   for ( i=0; i<mbs; i++ ) {
828     ailen[i] = imax[i] = ai[i+1] - ai[i];
829   }
830   a->nz = ai[mbs];
831 
832   /* diagonals may have moved, so kill the diagonal pointers */
833   if (fshift && a->diag) {
834     PetscFree(a->diag);
835     PLogObjectMemory(A,-(m+1)*sizeof(int));
836     a->diag = 0;
837   }
838   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
839            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
840   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
841            a->reallocs);
842   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
843   a->reallocs          = 0;
844   A->info.nz_unneeded  = (double)fshift*bs2;
845 
846   PetscFunctionReturn(0);
847 }
848 
849 
850 
851 /*
852    This function returns an array of flags which indicate the locations of contiguous
853    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
854    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
855    Assume: sizes should be long enough to hold all the values.
856 */
857 #undef __FUNC__
858 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks"
859 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
860 {
861   int i,j,k,row;
862   PetscTruth flg;
863 
864   /*   PetscFunctionBegin;*/
865   for ( i=0,j=0; i<n; j++ ) {
866     row = idx[i];
867     if (row%bs!=0) { /* Not the begining of a block */
868       sizes[j] = 1;
869       i++;
870     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
871       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
872       i++;
873     } else { /* Begining of the block, so check if the complete block exists */
874       flg = PETSC_TRUE;
875       for ( k=1; k<bs; k++ ) {
876         if (row+k != idx[i+k]) { /* break in the block */
877           flg = PETSC_FALSE;
878           break;
879         }
880       }
881       if (flg == PETSC_TRUE) { /* No break in the bs */
882         sizes[j] = bs;
883         i+= bs;
884       } else {
885         sizes[j] = 1;
886         i++;
887       }
888     }
889   }
890   *bs_max = j;
891   PetscFunctionReturn(0);
892 }
893 
894 #undef __FUNC__
895 #define __FUNC__ "MatZeroRows_SeqBAIJ"
896 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
897 {
898   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
899   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
900   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
901   Scalar      zero = 0.0;
902   MatScalar   *aa;
903 
904   PetscFunctionBegin;
905   /* Make a copy of the IS and  sort it */
906   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
907   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
908 
909   /* allocate memory for rows,sizes */
910   rows  = (int*)PetscMalloc((3*is_n+1)*sizeof(int)); CHKPTRQ(rows);
911   sizes = rows + is_n;
912 
913   /* initialize copy IS valurs to rows, and sort them */
914   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
915   ierr = PetscSortInt(is_n,rows); CHKERRQ(ierr);
916   ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max); CHKERRQ(ierr);
917   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
918 
919   for ( i=0,j=0; i<bs_max; j+=sizes[i],i++ ) {
920     row   = rows[j];
921     if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row);
922     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
923     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
924     if (sizes[i] == bs) {
925       if (diag) {
926         if (baij->ilen[row/bs] > 0) {
927           baij->ilen[row/bs] = 1;
928           baij->j[baij->i[row/bs]] = row/bs;
929           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar)); CHKERRQ(ierr);
930         }
931         /* Now insert all the diagoanl values for this bs */
932         for ( k=0; k<bs; k++ ) {
933           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
934         }
935       } else { /* (!diag) */
936         baij->ilen[row/bs] = 0;
937       } /* end (!diag) */
938     } else { /* (sizes[i] != bs) */
939 #if defined (USE_PETSC_DEBUG)
940       if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1");
941 #endif
942       for ( k=0; k<count; k++ ) {
943         aa[0] = zero;
944         aa+=bs;
945       }
946       if (diag) {
947         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
948       }
949     }
950   }
951 
952   PetscFree(rows);
953   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
954   PetscFunctionReturn(0);
955 }
956 
957 #undef __FUNC__
958 #define __FUNC__ "MatSetValues_SeqBAIJ"
959 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
960 {
961   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
962   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
963   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
964   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
965   int         ridx,cidx,bs2=a->bs2;
966   MatScalar   *ap,value,*aa=a->a,*bap;
967 
968   PetscFunctionBegin;
969   for ( k=0; k<m; k++ ) { /* loop over added rows */
970     row  = im[k]; brow = row/bs;
971     if (row < 0) continue;
972 #if defined(USE_PETSC_BOPT_g)
973     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
974 #endif
975     rp   = aj + ai[brow];
976     ap   = aa + bs2*ai[brow];
977     rmax = imax[brow];
978     nrow = ailen[brow];
979     low  = 0;
980     for ( l=0; l<n; l++ ) { /* loop over added columns */
981       if (in[l] < 0) continue;
982 #if defined(USE_PETSC_BOPT_g)
983       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
984 #endif
985       col = in[l]; bcol = col/bs;
986       ridx = row % bs; cidx = col % bs;
987       if (roworiented) {
988         value = v[l + k*n];
989       } else {
990         value = v[k + l*m];
991       }
992       if (!sorted) low = 0; high = nrow;
993       while (high-low > 7) {
994         t = (low+high)/2;
995         if (rp[t] > bcol) high = t;
996         else              low  = t;
997       }
998       for ( i=low; i<high; i++ ) {
999         if (rp[i] > bcol) break;
1000         if (rp[i] == bcol) {
1001           bap  = ap +  bs2*i + bs*cidx + ridx;
1002           if (is == ADD_VALUES) *bap += value;
1003           else                  *bap  = value;
1004           goto noinsert1;
1005         }
1006       }
1007       if (nonew == 1) goto noinsert1;
1008       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1009       if (nrow >= rmax) {
1010         /* there is no extra room in row, therefore enlarge */
1011         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1012         MatScalar *new_a;
1013 
1014         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1015 
1016         /* Malloc new storage space */
1017         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1018         new_a   = (MatScalar *) PetscMalloc( len ); CHKPTRQ(new_a);
1019         new_j   = (int *) (new_a + bs2*new_nz);
1020         new_i   = new_j + new_nz;
1021 
1022         /* copy over old data into new slots */
1023         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
1024         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1025         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
1026         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1027         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
1028                                                            len*sizeof(int));
1029         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
1030         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
1031         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
1032                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
1033         /* free up old matrix storage */
1034         PetscFree(a->a);
1035         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
1036         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1037         a->singlemalloc = 1;
1038 
1039         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1040         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1041         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1042         a->maxnz += bs2*CHUNKSIZE;
1043         a->reallocs++;
1044         a->nz++;
1045       }
1046       N = nrow++ - 1;
1047       /* shift up all the later entries in this row */
1048       for ( ii=N; ii>=i; ii-- ) {
1049         rp[ii+1] = rp[ii];
1050         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1051       }
1052       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1053       rp[i]                      = bcol;
1054       ap[bs2*i + bs*cidx + ridx] = value;
1055       noinsert1:;
1056       low = i;
1057     }
1058     ailen[brow] = nrow;
1059   }
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1064 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1065 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1066 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1067 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1068 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec);
1069 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1070 extern int MatScale_SeqBAIJ(Scalar*,Mat);
1071 extern int MatNorm_SeqBAIJ(Mat,NormType,double *);
1072 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*);
1073 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1074 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1075 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1076 extern int MatZeroEntries_SeqBAIJ(Mat);
1077 
1078 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1079 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1080 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1081 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1082 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1083 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1084 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1085 
1086 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1087 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1088 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1089 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1090 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1091 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1092 
1093 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1094 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1095 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1096 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1097 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1098 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1099 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1100 
1101 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1102 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1103 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1104 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1105 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1106 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1107 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1108 
1109 #undef __FUNC__
1110 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1111 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1112 {
1113   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1114   Mat         outA;
1115   int         ierr;
1116   PetscTruth  row_identity, col_identity;
1117 
1118   PetscFunctionBegin;
1119   if (info && info->fill != 0) SETERRQ(PETSC_ERR_SUP,0,"Only fill=0 supported");
1120   ierr = ISIdentity(row,&row_identity); CHKERRQ(ierr);
1121   ierr = ISIdentity(col,&col_identity); CHKERRQ(ierr);
1122   if (!row_identity || !col_identity) {
1123     SETERRQ(1,1,"Row and column permutations must be identity");
1124   }
1125 
1126   outA          = inA;
1127   inA->factor   = FACTOR_LU;
1128   a->row        = row;
1129   a->col        = col;
1130 
1131   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1132   ierr = ISInvertPermutation(col,&(a->icol)); CHKERRQ(ierr);
1133   PLogObjectParent(inA,a->icol);
1134 
1135   if (!a->solve_work) {
1136     a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1137     PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1138   }
1139 
1140   if (!a->diag) {
1141     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1142   }
1143   /*
1144       Blocksize 4 and 5 have a special faster factorization/solver for ILU(0) factorization
1145     with natural ordering
1146   */
1147   if (a->bs == 4) {
1148     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1149     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1150     PLogInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");
1151   } else if (a->bs == 5) {
1152     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1153     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1154     PLogInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");
1155   }
1156 
1157   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1158 
1159 
1160   PetscFunctionReturn(0);
1161 }
1162 #undef __FUNC__
1163 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1164 int MatPrintHelp_SeqBAIJ(Mat A)
1165 {
1166   static int called = 0;
1167   MPI_Comm   comm = A->comm;
1168 
1169   PetscFunctionBegin;
1170   if (called) {PetscFunctionReturn(0);} else called = 1;
1171   (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1172   (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 EXTERN_C_BEGIN
1177 #undef __FUNC__
1178 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1179 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1180 {
1181   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1182   int         i,nz,n;
1183 
1184   PetscFunctionBegin;
1185   nz = baij->maxnz;
1186   n  = baij->n;
1187   for (i=0; i<nz; i++) {
1188     baij->j[i] = indices[i];
1189   }
1190   baij->nz = nz;
1191   for ( i=0; i<n; i++ ) {
1192     baij->ilen[i] = baij->imax[i];
1193   }
1194 
1195   PetscFunctionReturn(0);
1196 }
1197 EXTERN_C_END
1198 
1199 #undef __FUNC__
1200 #define __FUNC__ "MatSeqBAIJSetColumnIndices"
1201 /*@
1202     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1203        in the matrix.
1204 
1205   Input Parameters:
1206 +  mat - the SeqBAIJ matrix
1207 -  indices - the column indices
1208 
1209   Notes:
1210     This can be called if you have precomputed the nonzero structure of the
1211   matrix and want to provide it to the matrix object to improve the performance
1212   of the MatSetValues() operation.
1213 
1214     You MUST have set the correct numbers of nonzeros per row in the call to
1215   MatCreateSeqBAIJ().
1216 
1217     MUST be called before any calls to MatSetValues();
1218 
1219 @*/
1220 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1221 {
1222   int ierr,(*f)(Mat,int *);
1223 
1224   PetscFunctionBegin;
1225   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1226   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);
1227          CHKERRQ(ierr);
1228   if (f) {
1229     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1230   } else {
1231     SETERRQ(1,1,"Wrong type of matrix to set column indices");
1232   }
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 /* -------------------------------------------------------------------*/
1237 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1238        MatGetRow_SeqBAIJ,
1239        MatRestoreRow_SeqBAIJ,
1240        MatMult_SeqBAIJ_N,
1241        MatMultAdd_SeqBAIJ_N,
1242        MatMultTrans_SeqBAIJ,
1243        MatMultTransAdd_SeqBAIJ,
1244        MatSolve_SeqBAIJ_N,
1245        0,
1246        0,
1247        0,
1248        MatLUFactor_SeqBAIJ,
1249        0,
1250        0,
1251        MatTranspose_SeqBAIJ,
1252        MatGetInfo_SeqBAIJ,
1253        MatEqual_SeqBAIJ,
1254        MatGetDiagonal_SeqBAIJ,
1255        MatDiagonalScale_SeqBAIJ,
1256        MatNorm_SeqBAIJ,
1257        0,
1258        MatAssemblyEnd_SeqBAIJ,
1259        0,
1260        MatSetOption_SeqBAIJ,
1261        MatZeroEntries_SeqBAIJ,
1262        MatZeroRows_SeqBAIJ,
1263        MatLUFactorSymbolic_SeqBAIJ,
1264        MatLUFactorNumeric_SeqBAIJ_N,
1265        0,
1266        0,
1267        MatGetSize_SeqBAIJ,
1268        MatGetSize_SeqBAIJ,
1269        MatGetOwnershipRange_SeqBAIJ,
1270        MatILUFactorSymbolic_SeqBAIJ,
1271        0,
1272        0,
1273        0,
1274        MatDuplicate_SeqBAIJ,
1275        0,
1276        0,
1277        MatILUFactor_SeqBAIJ,
1278        0,
1279        0,
1280        MatGetSubMatrices_SeqBAIJ,
1281        MatIncreaseOverlap_SeqBAIJ,
1282        MatGetValues_SeqBAIJ,
1283        0,
1284        MatPrintHelp_SeqBAIJ,
1285        MatScale_SeqBAIJ,
1286        0,
1287        0,
1288        0,
1289        MatGetBlockSize_SeqBAIJ,
1290        MatGetRowIJ_SeqBAIJ,
1291        MatRestoreRowIJ_SeqBAIJ,
1292        0,
1293        0,
1294        0,
1295        0,
1296        0,
1297        0,
1298        MatSetValuesBlocked_SeqBAIJ,
1299        MatGetSubMatrix_SeqBAIJ,
1300        0,
1301        0,
1302        MatGetMaps_Petsc};
1303 
1304 #undef __FUNC__
1305 #define __FUNC__ "MatCreateSeqBAIJ"
1306 /*@C
1307    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1308    compressed row) format.  For good matrix assembly performance the
1309    user should preallocate the matrix storage by setting the parameter nz
1310    (or the array nzz).  By setting these parameters accurately, performance
1311    during matrix assembly can be increased by more than a factor of 50.
1312 
1313    Collective on MPI_Comm
1314 
1315    Input Parameters:
1316 +  comm - MPI communicator, set to PETSC_COMM_SELF
1317 .  bs - size of block
1318 .  m - number of rows
1319 .  n - number of columns
1320 .  nz - number of block nonzeros per block row (same for all rows)
1321 -  nzz - array containing the number of block nonzeros in the various block rows
1322          (possibly different for each block row) or PETSC_NULL
1323 
1324    Output Parameter:
1325 .  A - the matrix
1326 
1327    Options Database Keys:
1328 .   -mat_no_unroll - uses code that does not unroll the loops in the
1329                      block calculations (much slower)
1330 .    -mat_block_size - size of the blocks to use
1331 
1332    Notes:
1333    The block AIJ format is fully compatible with standard Fortran 77
1334    storage.  That is, the stored row and column indices can begin at
1335    either one (as in Fortran) or zero.  See the users' manual for details.
1336 
1337    Specify the preallocated storage with either nz or nnz (not both).
1338    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1339    allocation.  For additional details, see the users manual chapter on
1340    matrices.
1341 
1342    Level: intermediate
1343 
1344 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1345 @*/
1346 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1347 {
1348   Mat         B;
1349   Mat_SeqBAIJ *b;
1350   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
1351 
1352   PetscFunctionBegin;
1353   MPI_Comm_size(comm,&size);
1354   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1");
1355 
1356   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr);
1357 
1358   if (mbs*bs!=m || nbs*bs!=n) {
1359     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize");
1360   }
1361 
1362   *A = 0;
1363   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView);
1364   PLogObjectCreate(B);
1365   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1366   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1367   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1368   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1369   if (!flg) {
1370     switch (bs) {
1371     case 1:
1372       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1373       B->ops->solve           = MatSolve_SeqBAIJ_1;
1374       B->ops->mult            = MatMult_SeqBAIJ_1;
1375       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1376       break;
1377     case 2:
1378       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1379       B->ops->solve           = MatSolve_SeqBAIJ_2;
1380       B->ops->mult            = MatMult_SeqBAIJ_2;
1381       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1382       break;
1383     case 3:
1384       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1385       B->ops->solve           = MatSolve_SeqBAIJ_3;
1386       B->ops->mult            = MatMult_SeqBAIJ_3;
1387       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1388       break;
1389     case 4:
1390       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1391       B->ops->solve           = MatSolve_SeqBAIJ_4;
1392       B->ops->mult            = MatMult_SeqBAIJ_4;
1393       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1394       break;
1395     case 5:
1396       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1397       B->ops->solve           = MatSolve_SeqBAIJ_5;
1398       B->ops->mult            = MatMult_SeqBAIJ_5;
1399       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1400       break;
1401     case 7:
1402       B->ops->mult            = MatMult_SeqBAIJ_7;
1403       B->ops->solve           = MatSolve_SeqBAIJ_7;
1404       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1405       break;
1406     }
1407   }
1408   B->ops->destroy     = MatDestroy_SeqBAIJ;
1409   B->ops->view        = MatView_SeqBAIJ;
1410   B->factor           = 0;
1411   B->lupivotthreshold = 1.0;
1412   B->mapping          = 0;
1413   b->row              = 0;
1414   b->col              = 0;
1415   b->icol             = 0;
1416   b->reallocs         = 0;
1417 
1418   b->m       = m; B->m = m; B->M = m;
1419   b->n       = n; B->n = n; B->N = n;
1420 
1421   ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr);
1422   ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr);
1423 
1424   b->mbs     = mbs;
1425   b->nbs     = nbs;
1426   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1427   if (nnz == PETSC_NULL) {
1428     if (nz == PETSC_DEFAULT) nz = 5;
1429     else if (nz <= 0)        nz = 1;
1430     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1431     nz = nz*mbs;
1432   } else {
1433     nz = 0;
1434     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1435   }
1436 
1437   /* allocate the matrix space */
1438   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int);
1439   b->a  = (MatScalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1440   PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
1441   b->j  = (int *) (b->a + nz*bs2);
1442   PetscMemzero(b->j,nz*sizeof(int));
1443   b->i  = b->j + nz;
1444   b->singlemalloc = 1;
1445 
1446   b->i[0] = 0;
1447   for (i=1; i<mbs+1; i++) {
1448     b->i[i] = b->i[i-1] + b->imax[i-1];
1449   }
1450 
1451   /* b->ilen will count nonzeros in each block row so far. */
1452   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1453   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1454   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1455 
1456   b->bs               = bs;
1457   b->bs2              = bs2;
1458   b->mbs              = mbs;
1459   b->nz               = 0;
1460   b->maxnz            = nz*bs2;
1461   b->sorted           = 0;
1462   b->roworiented      = 1;
1463   b->nonew            = 0;
1464   b->diag             = 0;
1465   b->solve_work       = 0;
1466   b->mult_work        = 0;
1467   b->spptr            = 0;
1468   B->info.nz_unneeded = (double)b->maxnz;
1469 
1470   *A = B;
1471   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1472   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1473 
1474   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1475                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1476                                      (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 #undef __FUNC__
1481 #define __FUNC__ "MatDuplicate_SeqBAIJ"
1482 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1483 {
1484   Mat         C;
1485   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1486   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1487 
1488   PetscFunctionBegin;
1489   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix");
1490 
1491   *B = 0;
1492   PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView);
1493   PLogObjectCreate(C);
1494   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1495   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
1496   C->ops->destroy    = MatDestroy_SeqBAIJ;
1497   C->ops->view       = MatView_SeqBAIJ;
1498   C->factor          = A->factor;
1499   c->row             = 0;
1500   c->col             = 0;
1501   C->assembled       = PETSC_TRUE;
1502 
1503   c->m = C->m   = a->m;
1504   c->n = C->n   = a->n;
1505   C->M          = a->m;
1506   C->N          = a->n;
1507 
1508   c->bs         = a->bs;
1509   c->bs2        = a->bs2;
1510   c->mbs        = a->mbs;
1511   c->nbs        = a->nbs;
1512 
1513   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1514   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1515   for ( i=0; i<mbs; i++ ) {
1516     c->imax[i] = a->imax[i];
1517     c->ilen[i] = a->ilen[i];
1518   }
1519 
1520   /* allocate the matrix space */
1521   c->singlemalloc = 1;
1522   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1523   c->a  = (MatScalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1524   c->j  = (int *) (c->a + nz*bs2);
1525   c->i  = c->j + nz;
1526   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1527   if (mbs > 0) {
1528     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1529     if (cpvalues == MAT_COPY_VALUES) {
1530       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
1531     } else {
1532       PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
1533     }
1534   }
1535 
1536   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1537   c->sorted      = a->sorted;
1538   c->roworiented = a->roworiented;
1539   c->nonew       = a->nonew;
1540 
1541   if (a->diag) {
1542     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1543     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1544     for ( i=0; i<mbs; i++ ) {
1545       c->diag[i] = a->diag[i];
1546     }
1547   }
1548   else c->diag          = 0;
1549   c->nz                 = a->nz;
1550   c->maxnz              = a->maxnz;
1551   c->solve_work         = 0;
1552   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1553   c->mult_work          = 0;
1554   *B = C;
1555   ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1556   PetscFunctionReturn(0);
1557 }
1558 
1559 #undef __FUNC__
1560 #define __FUNC__ "MatLoad_SeqBAIJ"
1561 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1562 {
1563   Mat_SeqBAIJ  *a;
1564   Mat          B;
1565   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1566   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1567   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1568   int          *masked, nmask,tmp,bs2,ishift;
1569   Scalar       *aa;
1570   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1571 
1572   PetscFunctionBegin;
1573   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1574   bs2  = bs*bs;
1575 
1576   MPI_Comm_size(comm,&size);
1577   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor");
1578   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1579   ierr = PetscBinaryRead(fd,header,4,PETSC_INT); CHKERRQ(ierr);
1580   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object");
1581   M = header[1]; N = header[2]; nz = header[3];
1582 
1583   if (header[3] < 0) {
1584     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ");
1585   }
1586 
1587   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
1588 
1589   /*
1590      This code adds extra rows to make sure the number of rows is
1591     divisible by the blocksize
1592   */
1593   mbs        = M/bs;
1594   extra_rows = bs - M + bs*(mbs);
1595   if (extra_rows == bs) extra_rows = 0;
1596   else                  mbs++;
1597   if (extra_rows) {
1598     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1599   }
1600 
1601   /* read in row lengths */
1602   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1603   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
1604   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1605 
1606   /* read in column indices */
1607   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1608   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT); CHKERRQ(ierr);
1609   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1610 
1611   /* loop over row lengths determining block row lengths */
1612   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1613   PetscMemzero(browlengths,mbs*sizeof(int));
1614   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1615   PetscMemzero(mask,mbs*sizeof(int));
1616   masked = mask + mbs;
1617   rowcount = 0; nzcount = 0;
1618   for ( i=0; i<mbs; i++ ) {
1619     nmask = 0;
1620     for ( j=0; j<bs; j++ ) {
1621       kmax = rowlengths[rowcount];
1622       for ( k=0; k<kmax; k++ ) {
1623         tmp = jj[nzcount++]/bs;
1624         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1625       }
1626       rowcount++;
1627     }
1628     browlengths[i] += nmask;
1629     /* zero out the mask elements we set */
1630     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1631   }
1632 
1633   /* create our matrix */
1634   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1635   B = *A;
1636   a = (Mat_SeqBAIJ *) B->data;
1637 
1638   /* set matrix "i" values */
1639   a->i[0] = 0;
1640   for ( i=1; i<= mbs; i++ ) {
1641     a->i[i]      = a->i[i-1] + browlengths[i-1];
1642     a->ilen[i-1] = browlengths[i-1];
1643   }
1644   a->nz         = 0;
1645   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1646 
1647   /* read in nonzero values */
1648   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1649   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR); CHKERRQ(ierr);
1650   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1651 
1652   /* set "a" and "j" values into matrix */
1653   nzcount = 0; jcount = 0;
1654   for ( i=0; i<mbs; i++ ) {
1655     nzcountb = nzcount;
1656     nmask    = 0;
1657     for ( j=0; j<bs; j++ ) {
1658       kmax = rowlengths[i*bs+j];
1659       for ( k=0; k<kmax; k++ ) {
1660         tmp = jj[nzcount++]/bs;
1661 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1662       }
1663       rowcount++;
1664     }
1665     /* sort the masked values */
1666     PetscSortInt(nmask,masked);
1667 
1668     /* set "j" values into matrix */
1669     maskcount = 1;
1670     for ( j=0; j<nmask; j++ ) {
1671       a->j[jcount++]  = masked[j];
1672       mask[masked[j]] = maskcount++;
1673     }
1674     /* set "a" values into matrix */
1675     ishift = bs2*a->i[i];
1676     for ( j=0; j<bs; j++ ) {
1677       kmax = rowlengths[i*bs+j];
1678       for ( k=0; k<kmax; k++ ) {
1679         tmp       = jj[nzcountb]/bs ;
1680         block     = mask[tmp] - 1;
1681         point     = jj[nzcountb] - bs*tmp;
1682         idx       = ishift + bs2*block + j + bs*point;
1683         a->a[idx] = aa[nzcountb++];
1684       }
1685     }
1686     /* zero out the mask elements we set */
1687     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1688   }
1689   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix");
1690 
1691   PetscFree(rowlengths);
1692   PetscFree(browlengths);
1693   PetscFree(aa);
1694   PetscFree(jj);
1695   PetscFree(mask);
1696 
1697   B->assembled = PETSC_TRUE;
1698 
1699   ierr = MatView_Private(B); CHKERRQ(ierr);
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 
1704 
1705