xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 4ccb76dd2a06ade17c2800eec40bc3f6b9abd66f)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: baij.c,v 1.177 1999/05/12 03:29:44 bsmith Exp balay $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 #include "sys.h"
10 #include "src/mat/impls/baij/seq/baij.h"
11 #include "src/vec/vecimpl.h"
12 #include "src/inline/spops.h"
13 
14 #define CHUNKSIZE  10
15 
16 /*
17      Checks for missing diagonals
18 */
19 #undef __FUNC__
20 #define __FUNC__ "MatMissingDiag_SeqBAIJ"
21 int MatMissingDiag_SeqBAIJ(Mat A)
22 {
23   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
24   int         *diag = a->diag, *jj = a->j,i;
25 
26   PetscFunctionBegin;
27   for ( i=0; i<a->mbs; i++ ) {
28     if (jj[diag[i]] != i) {
29       SETERRQ1(1,1,"Matrix is missing diagonal number %d",i);
30     }
31   }
32   PetscFunctionReturn(0);
33 }
34 
35 #undef __FUNC__
36 #define __FUNC__ "MatMarkDiag_SeqBAIJ"
37 int MatMarkDiag_SeqBAIJ(Mat A)
38 {
39   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
40   int         i,j, *diag, m = a->mbs;
41 
42   PetscFunctionBegin;
43   diag = (int *) PetscMalloc( (m+1)*sizeof(int));CHKPTRQ(diag);
44   PLogObjectMemory(A,(m+1)*sizeof(int));
45   for ( i=0; i<m; i++ ) {
46     diag[i] = a->i[i+1];
47     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
48       if (a->j[j] == i) {
49         diag[i] = j;
50         break;
51       }
52     }
53   }
54   a->diag = diag;
55   PetscFunctionReturn(0);
56 }
57 
58 
59 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
60 
61 #undef __FUNC__
62 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
63 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
64                             PetscTruth *done)
65 {
66   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
67   int         ierr, n = a->mbs,i;
68 
69   PetscFunctionBegin;
70   *nn = n;
71   if (!ia) PetscFunctionReturn(0);
72   if (symmetric) {
73     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
74   } else if (oshift == 1) {
75     /* temporarily add 1 to i and j indices */
76     int nz = a->i[n] + 1;
77     for ( i=0; i<nz; i++ ) a->j[i]++;
78     for ( i=0; i<n+1; i++ ) a->i[i]++;
79     *ia = a->i; *ja = a->j;
80   } else {
81     *ia = a->i; *ja = a->j;
82   }
83 
84   PetscFunctionReturn(0);
85 }
86 
87 #undef __FUNC__
88 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
89 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
90                                 PetscTruth *done)
91 {
92   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
93   int         i,n = a->mbs;
94 
95   PetscFunctionBegin;
96   if (!ia) PetscFunctionReturn(0);
97   if (symmetric) {
98     PetscFree(*ia);
99     PetscFree(*ja);
100   } else if (oshift == 1) {
101     int nz = a->i[n];
102     for ( i=0; i<nz; i++ ) a->j[i]--;
103     for ( i=0; i<n+1; i++ ) a->i[i]--;
104   }
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNC__
109 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
110 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
111 {
112   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
113 
114   PetscFunctionBegin;
115   *bs = baij->bs;
116   PetscFunctionReturn(0);
117 }
118 
119 
120 #undef __FUNC__
121 #define __FUNC__ "MatDestroy_SeqBAIJ"
122 int MatDestroy_SeqBAIJ(Mat A)
123 {
124   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
125   int         ierr;
126 
127   if (--A->refct > 0) PetscFunctionReturn(0);
128 
129   if (A->mapping) {
130     ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr);
131   }
132   if (A->bmapping) {
133     ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr);
134   }
135   if (A->rmap) {
136     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
137   }
138   if (A->cmap) {
139     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
140   }
141 #if defined(PETSC_USE_LOG)
142   PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
143 #endif
144   PetscFree(a->a);
145   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
146   if (a->diag) PetscFree(a->diag);
147   if (a->ilen) PetscFree(a->ilen);
148   if (a->imax) PetscFree(a->imax);
149   if (a->solve_work) PetscFree(a->solve_work);
150   if (a->mult_work) PetscFree(a->mult_work);
151   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
152   if (a->saved_values) PetscFree(a->saved_values);
153   PetscFree(a);
154   PLogObjectDestroy(A);
155   PetscHeaderDestroy(A);
156   PetscFunctionReturn(0);
157 }
158 
159 #undef __FUNC__
160 #define __FUNC__ "MatSetOption_SeqBAIJ"
161 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
162 {
163   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
164 
165   PetscFunctionBegin;
166   if      (op == MAT_ROW_ORIENTED)                 a->roworiented = 1;
167   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented = 0;
168   else if (op == MAT_COLUMNS_SORTED)               a->sorted      = 1;
169   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted      = 0;
170   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew       = 1;
171   else if (op == MAT_NEW_NONZERO_LOCATION_ERR)     a->nonew       = -1;
172   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR)   a->nonew       = -2;
173   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew       = 0;
174   else if (op == MAT_ROWS_SORTED ||
175            op == MAT_ROWS_UNSORTED ||
176            op == MAT_SYMMETRIC ||
177            op == MAT_STRUCTURALLY_SYMMETRIC ||
178            op == MAT_YES_NEW_DIAGONALS ||
179            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
180            op == MAT_USE_HASH_TABLE) {
181     PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
182   } else if (op == MAT_NO_NEW_DIAGONALS) {
183     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
184   } else {
185     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
186   }
187   PetscFunctionReturn(0);
188 }
189 
190 
191 #undef __FUNC__
192 #define __FUNC__ "MatGetSize_SeqBAIJ"
193 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
194 {
195   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
196 
197   PetscFunctionBegin;
198   if (m) *m = a->m;
199   if (n) *n = a->n;
200   PetscFunctionReturn(0);
201 }
202 
203 #undef __FUNC__
204 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
205 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
206 {
207   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
208 
209   PetscFunctionBegin;
210   *m = 0; *n = a->m;
211   PetscFunctionReturn(0);
212 }
213 
214 #undef __FUNC__
215 #define __FUNC__ "MatGetRow_SeqBAIJ"
216 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
217 {
218   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ *) A->data;
219   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
220   MatScalar    *aa,*aa_i;
221   Scalar       *v_i;
222 
223   PetscFunctionBegin;
224   bs  = a->bs;
225   ai  = a->i;
226   aj  = a->j;
227   aa  = a->a;
228   bs2 = a->bs2;
229 
230   if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range");
231 
232   bn  = row/bs;   /* Block number */
233   bp  = row % bs; /* Block Position */
234   M   = ai[bn+1] - ai[bn];
235   *nz = bs*M;
236 
237   if (v) {
238     *v = 0;
239     if (*nz) {
240       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) );CHKPTRQ(*v);
241       for ( i=0; i<M; i++ ) { /* for each block in the block row */
242         v_i  = *v + i*bs;
243         aa_i = aa + bs2*(ai[bn] + i);
244         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
245       }
246     }
247   }
248 
249   if (idx) {
250     *idx = 0;
251     if (*nz) {
252       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) );CHKPTRQ(*idx);
253       for ( i=0; i<M; i++ ) { /* for each block in the block row */
254         idx_i = *idx + i*bs;
255         itmp  = bs*aj[ai[bn] + i];
256         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
257       }
258     }
259   }
260   PetscFunctionReturn(0);
261 }
262 
263 #undef __FUNC__
264 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
265 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
266 {
267   PetscFunctionBegin;
268   if (idx) {if (*idx) PetscFree(*idx);}
269   if (v)   {if (*v)   PetscFree(*v);}
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNC__
274 #define __FUNC__ "MatTranspose_SeqBAIJ"
275 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
276 {
277   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
278   Mat         C;
279   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
280   int         *rows,*cols,bs2=a->bs2;
281   MatScalar   *array = a->a;
282 
283   PetscFunctionBegin;
284   if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place");
285   col  = (int *) PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col);
286   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
287 
288   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
289   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
290   PetscFree(col);
291   rows = (int *) PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows);
292   cols = rows + bs;
293   for ( i=0; i<mbs; i++ ) {
294     cols[0] = i*bs;
295     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
296     len = ai[i+1] - ai[i];
297     for ( j=0; j<len; j++ ) {
298       rows[0] = (*aj++)*bs;
299       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
300       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
301       array += bs2;
302     }
303   }
304   PetscFree(rows);
305 
306   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
307   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
308 
309   if (B != PETSC_NULL) {
310     *B = C;
311   } else {
312     PetscOps *Abops;
313     MatOps   Aops;
314 
315     /* This isn't really an in-place transpose */
316     PetscFree(a->a);
317     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
318     if (a->diag) PetscFree(a->diag);
319     if (a->ilen) PetscFree(a->ilen);
320     if (a->imax) PetscFree(a->imax);
321     if (a->solve_work) PetscFree(a->solve_work);
322     PetscFree(a);
323 
324 
325     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
326     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
327 
328     /*
329        This is horrible, horrible code. We need to keep the
330       A pointers for the bops and ops but copy everything
331       else from C.
332     */
333     Abops    = A->bops;
334     Aops     = A->ops;
335     ierr     = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr);
336     A->bops  = Abops;
337     A->ops   = Aops;
338     A->qlist = 0;
339 
340     PetscHeaderDestroy(C);
341   }
342   PetscFunctionReturn(0);
343 }
344 
345 #undef __FUNC__
346 #define __FUNC__ "MatView_SeqBAIJ_Binary"
347 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
348 {
349   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
350   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
351   Scalar      *aa;
352   FILE        *file;
353 
354   PetscFunctionBegin;
355   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
356   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
357   col_lens[0] = MAT_COOKIE;
358 
359   col_lens[1] = a->m;
360   col_lens[2] = a->n;
361   col_lens[3] = a->nz*bs2;
362 
363   /* store lengths of each row and write (including header) to file */
364   count = 0;
365   for ( i=0; i<a->mbs; i++ ) {
366     for ( j=0; j<bs; j++ ) {
367       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
368     }
369   }
370   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr);
371   PetscFree(col_lens);
372 
373   /* store column indices (zero start index) */
374   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) );CHKPTRQ(jj);
375   count = 0;
376   for ( i=0; i<a->mbs; i++ ) {
377     for ( j=0; j<bs; j++ ) {
378       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
379         for ( l=0; l<bs; l++ ) {
380           jj[count++] = bs*a->j[k] + l;
381         }
382       }
383     }
384   }
385   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
386   PetscFree(jj);
387 
388   /* store nonzero values */
389   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa);
390   count = 0;
391   for ( i=0; i<a->mbs; i++ ) {
392     for ( j=0; j<bs; j++ ) {
393       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
394         for ( l=0; l<bs; l++ ) {
395           aa[count++] = a->a[bs2*k + l*bs + j];
396         }
397       }
398     }
399   }
400   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
401   PetscFree(aa);
402 
403   ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
404   if (file) {
405     fprintf(file,"-matload_block_size %d\n",a->bs);
406   }
407   PetscFunctionReturn(0);
408 }
409 
410 #undef __FUNC__
411 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
412 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
413 {
414   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
415   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
416   FILE        *fd;
417   char        *outputname;
418 
419   PetscFunctionBegin;
420   ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
421   ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr);
422   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
423   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
424     ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
425   } else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
426     SETERRQ(PETSC_ERR_SUP,0,"Socket format not supported");
427   } else if (format == VIEWER_FORMAT_ASCII_COMMON) {
428     for ( i=0; i<a->mbs; i++ ) {
429       for ( j=0; j<bs; j++ ) {
430         fprintf(fd,"row %d:",i*bs+j);
431         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
432           for ( l=0; l<bs; l++ ) {
433 #if defined(PETSC_USE_COMPLEX)
434             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
435               fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
436                       PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));
437             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
438               fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
439                       PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));
440             } else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
441               fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));
442             }
443 #else
444             if (a->a[bs2*k + l*bs + j] != 0.0) {
445               fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
446             }
447 #endif
448           }
449         }
450         fprintf(fd,"\n");
451       }
452     }
453   } else {
454     for ( i=0; i<a->mbs; i++ ) {
455       for ( j=0; j<bs; j++ ) {
456         fprintf(fd,"row %d:",i*bs+j);
457         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
458           for ( l=0; l<bs; l++ ) {
459 #if defined(PETSC_USE_COMPLEX)
460             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) {
461               fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
462                 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));
463             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) {
464               fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
465                 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));
466             } else {
467               fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));
468             }
469 #else
470             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
471 #endif
472           }
473         }
474         fprintf(fd,"\n");
475       }
476     }
477   }
478   fflush(fd);
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNC__
483 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom"
484 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa)
485 {
486   Mat          A = (Mat) Aa;
487   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
488   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
489   double       xl,yl,xr,yr,x_l,x_r,y_l,y_r;
490   MatScalar    *aa;
491   MPI_Comm     comm;
492   Viewer       viewer;
493 
494   PetscFunctionBegin;
495   /*
496       This is nasty. If this is called from an originally parallel matrix
497    then all processes call this, but only the first has the matrix so the
498    rest should return immediately.
499   */
500   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
501   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
502   if (rank) PetscFunctionReturn(0);
503 
504   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr);
505 
506   ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
507 
508   /* loop over matrix elements drawing boxes */
509   color = DRAW_BLUE;
510   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
511     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
512       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
513       x_l = a->j[j]*bs; x_r = x_l + 1.0;
514       aa = a->a + j*bs2;
515       for ( k=0; k<bs; k++ ) {
516         for ( l=0; l<bs; l++ ) {
517           if (PetscReal(*aa++) >=  0.) continue;
518           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
519         }
520       }
521     }
522   }
523   color = DRAW_CYAN;
524   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
525     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
526       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
527       x_l = a->j[j]*bs; x_r = x_l + 1.0;
528       aa = a->a + j*bs2;
529       for ( k=0; k<bs; k++ ) {
530         for ( l=0; l<bs; l++ ) {
531           if (PetscReal(*aa++) != 0.) continue;
532           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
533         }
534       }
535     }
536   }
537 
538   color = DRAW_RED;
539   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
540     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
541       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
542       x_l = a->j[j]*bs; x_r = x_l + 1.0;
543       aa = a->a + j*bs2;
544       for ( k=0; k<bs; k++ ) {
545         for ( l=0; l<bs; l++ ) {
546           if (PetscReal(*aa++) <= 0.) continue;
547           DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
548         }
549       }
550     }
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNC__
556 #define __FUNC__ "MatView_SeqBAIJ_Draw"
557 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
558 {
559   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
560   int          ierr;
561   double       xl,yl,xr,yr,w,h;
562   Draw         draw;
563   PetscTruth   isnull;
564 
565   PetscFunctionBegin;
566 
567   ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
568   ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
569 
570   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
571   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
572   xr += w;    yr += h;  xl = -w;     yl = -h;
573   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
574   ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
575   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNC__
580 #define __FUNC__ "MatView_SeqBAIJ"
581 int MatView_SeqBAIJ(Mat A,Viewer viewer)
582 {
583   ViewerType  vtype;
584   int         ierr;
585 
586   PetscFunctionBegin;
587   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
588   if (PetscTypeCompare(vtype,SOCKET_VIEWER)) {
589     SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported");
590   } else if (PetscTypeCompare(vtype,ASCII_VIEWER)){
591     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
592   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
593     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
594   } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
595     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
596   } else {
597     SETERRQ(1,1,"Viewer type not supported by PETSc object");
598   }
599   PetscFunctionReturn(0);
600 }
601 
602 
603 #undef __FUNC__
604 #define __FUNC__ "MatGetValues_SeqBAIJ"
605 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
606 {
607   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
608   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
609   int        *ai = a->i, *ailen = a->ilen;
610   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
611   MatScalar  *ap, *aa = a->a, zero = 0.0;
612 
613   PetscFunctionBegin;
614   for ( k=0; k<m; k++ ) { /* loop over rows */
615     row  = im[k]; brow = row/bs;
616     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
617     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
618     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
619     nrow = ailen[brow];
620     for ( l=0; l<n; l++ ) { /* loop over columns */
621       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
622       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
623       col  = in[l] ;
624       bcol = col/bs;
625       cidx = col%bs;
626       ridx = row%bs;
627       high = nrow;
628       low  = 0; /* assume unsorted */
629       while (high-low > 5) {
630         t = (low+high)/2;
631         if (rp[t] > bcol) high = t;
632         else             low  = t;
633       }
634       for ( i=low; i<high; i++ ) {
635         if (rp[i] > bcol) break;
636         if (rp[i] == bcol) {
637           *v++ = ap[bs2*i+bs*cidx+ridx];
638           goto finished;
639         }
640       }
641       *v++ = zero;
642       finished:;
643     }
644   }
645   PetscFunctionReturn(0);
646 }
647 
648 
649 #undef __FUNC__
650 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
651 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
652 {
653   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
654   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
655   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented;
656   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
657   Scalar      *value = v;
658   MatScalar   *ap,*aa=a->a,*bap;
659 
660   PetscFunctionBegin;
661   if (roworiented) {
662     stepval = (n-1)*bs;
663   } else {
664     stepval = (m-1)*bs;
665   }
666   for ( k=0; k<m; k++ ) { /* loop over added rows */
667     row  = im[k];
668     if (row < 0) continue;
669 #if defined(PETSC_USE_BOPT_g)
670     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
671 #endif
672     rp   = aj + ai[row];
673     ap   = aa + bs2*ai[row];
674     rmax = imax[row];
675     nrow = ailen[row];
676     low  = 0;
677     for ( l=0; l<n; l++ ) { /* loop over added columns */
678       if (in[l] < 0) continue;
679 #if defined(PETSC_USE_BOPT_g)
680       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
681 #endif
682       col = in[l];
683       if (roworiented) {
684         value = v + k*(stepval+bs)*bs + l*bs;
685       } else {
686         value = v + l*(stepval+bs)*bs + k*bs;
687       }
688       if (!sorted) low = 0; high = nrow;
689       while (high-low > 7) {
690         t = (low+high)/2;
691         if (rp[t] > col) high = t;
692         else             low  = t;
693       }
694       for ( i=low; i<high; i++ ) {
695         if (rp[i] > col) break;
696         if (rp[i] == col) {
697           bap  = ap +  bs2*i;
698           if (roworiented) {
699             if (is == ADD_VALUES) {
700               for ( ii=0; ii<bs; ii++,value+=stepval ) {
701                 for (jj=ii; jj<bs2; jj+=bs ) {
702                   bap[jj] += *value++;
703                 }
704               }
705             } else {
706               for ( ii=0; ii<bs; ii++,value+=stepval ) {
707                 for (jj=ii; jj<bs2; jj+=bs ) {
708                   bap[jj] = *value++;
709                 }
710               }
711             }
712           } else {
713             if (is == ADD_VALUES) {
714               for ( ii=0; ii<bs; ii++,value+=stepval ) {
715                 for (jj=0; jj<bs; jj++ ) {
716                   *bap++ += *value++;
717                 }
718               }
719             } else {
720               for ( ii=0; ii<bs; ii++,value+=stepval ) {
721                 for (jj=0; jj<bs; jj++ ) {
722                   *bap++  = *value++;
723                 }
724               }
725             }
726           }
727           goto noinsert2;
728         }
729       }
730       if (nonew == 1) goto noinsert2;
731       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
732       if (nrow >= rmax) {
733         /* there is no extra room in row, therefore enlarge */
734         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
735         MatScalar *new_a;
736 
737         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
738 
739         /* malloc new storage space */
740         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
741         new_a   = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a);
742         new_j   = (int *) (new_a + bs2*new_nz);
743         new_i   = new_j + new_nz;
744 
745         /* copy over old data into new slots */
746         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
747         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
748         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
749         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
750         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
751         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
752         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
753         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
754         /* free up old matrix storage */
755         PetscFree(a->a);
756         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
757         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
758         a->singlemalloc = 1;
759 
760         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
761         rmax = imax[row] = imax[row] + CHUNKSIZE;
762         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
763         a->maxnz += bs2*CHUNKSIZE;
764         a->reallocs++;
765         a->nz++;
766       }
767       N = nrow++ - 1;
768       /* shift up all the later entries in this row */
769       for ( ii=N; ii>=i; ii-- ) {
770         rp[ii+1] = rp[ii];
771         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
772       }
773       if (N >= i) {
774         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
775       }
776       rp[i] = col;
777       bap   = ap +  bs2*i;
778       if (roworiented) {
779         for ( ii=0; ii<bs; ii++,value+=stepval) {
780           for (jj=ii; jj<bs2; jj+=bs ) {
781             bap[jj] = *value++;
782           }
783         }
784       } else {
785         for ( ii=0; ii<bs; ii++,value+=stepval ) {
786           for (jj=0; jj<bs; jj++ ) {
787             *bap++  = *value++;
788           }
789         }
790       }
791       noinsert2:;
792       low = i;
793     }
794     ailen[row] = nrow;
795   }
796   PetscFunctionReturn(0);
797 }
798 
799 
800 #undef __FUNC__
801 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
802 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
803 {
804   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
805   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
806   int        m = a->m,*ip, N, *ailen = a->ilen;
807   int        mbs = a->mbs, bs2 = a->bs2,rmax = 0,ierr;
808   MatScalar  *aa = a->a, *ap;
809 
810   PetscFunctionBegin;
811   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
812 
813   if (m) rmax = ailen[0];
814   for ( i=1; i<mbs; i++ ) {
815     /* move each row back by the amount of empty slots (fshift) before it*/
816     fshift += imax[i-1] - ailen[i-1];
817     rmax   = PetscMax(rmax,ailen[i]);
818     if (fshift) {
819       ip = aj + ai[i]; ap = aa + bs2*ai[i];
820       N = ailen[i];
821       for ( j=0; j<N; j++ ) {
822         ip[j-fshift] = ip[j];
823         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
824       }
825     }
826     ai[i] = ai[i-1] + ailen[i-1];
827   }
828   if (mbs) {
829     fshift += imax[mbs-1] - ailen[mbs-1];
830     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
831   }
832   /* reset ilen and imax for each row */
833   for ( i=0; i<mbs; i++ ) {
834     ailen[i] = imax[i] = ai[i+1] - ai[i];
835   }
836   a->nz = ai[mbs];
837 
838   /* diagonals may have moved, so kill the diagonal pointers */
839   if (fshift && a->diag) {
840     PetscFree(a->diag);
841     PLogObjectMemory(A,-(m+1)*sizeof(int));
842     a->diag = 0;
843   }
844   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
845            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
846   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
847            a->reallocs);
848   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
849   a->reallocs          = 0;
850   A->info.nz_unneeded  = (double)fshift*bs2;
851 
852   PetscFunctionReturn(0);
853 }
854 
855 
856 
857 /*
858    This function returns an array of flags which indicate the locations of contiguous
859    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
860    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
861    Assume: sizes should be long enough to hold all the values.
862 */
863 #undef __FUNC__
864 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks"
865 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
866 {
867   int i,j,k,row;
868   PetscTruth flg;
869 
870   /*   PetscFunctionBegin;*/
871   for ( i=0,j=0; i<n; j++ ) {
872     row = idx[i];
873     if (row%bs!=0) { /* Not the begining of a block */
874       sizes[j] = 1;
875       i++;
876     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
877       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
878       i++;
879     } else { /* Begining of the block, so check if the complete block exists */
880       flg = PETSC_TRUE;
881       for ( k=1; k<bs; k++ ) {
882         if (row+k != idx[i+k]) { /* break in the block */
883           flg = PETSC_FALSE;
884           break;
885         }
886       }
887       if (flg == PETSC_TRUE) { /* No break in the bs */
888         sizes[j] = bs;
889         i+= bs;
890       } else {
891         sizes[j] = 1;
892         i++;
893       }
894     }
895   }
896   *bs_max = j;
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNC__
901 #define __FUNC__ "MatZeroRows_SeqBAIJ"
902 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
903 {
904   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
905   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
906   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
907   Scalar      zero = 0.0;
908   MatScalar   *aa;
909 
910   PetscFunctionBegin;
911   /* Make a copy of the IS and  sort it */
912   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
913   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
914 
915   /* allocate memory for rows,sizes */
916   rows  = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows);
917   sizes = rows + is_n;
918 
919   /* initialize copy IS valurs to rows, and sort them */
920   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
921   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
922   ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
923   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
924 
925   for ( i=0,j=0; i<bs_max; j+=sizes[i],i++ ) {
926     row   = rows[j];
927     if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row);
928     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
929     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
930     if (sizes[i] == bs) {
931       if (diag) {
932         if (baij->ilen[row/bs] > 0) {
933           baij->ilen[row/bs] = 1;
934           baij->j[baij->i[row/bs]] = row/bs;
935           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
936         }
937         /* Now insert all the diagoanl values for this bs */
938         for ( k=0; k<bs; k++ ) {
939           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
940         }
941       } else { /* (!diag) */
942         baij->ilen[row/bs] = 0;
943       } /* end (!diag) */
944     } else { /* (sizes[i] != bs) */
945 #if defined (PETSC_USE_DEBUG)
946       if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1");
947 #endif
948       for ( k=0; k<count; k++ ) {
949         aa[0] = zero;
950         aa+=bs;
951       }
952       if (diag) {
953         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
954       }
955     }
956   }
957 
958   PetscFree(rows);
959   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
960   PetscFunctionReturn(0);
961 }
962 
963 #undef __FUNC__
964 #define __FUNC__ "MatSetValues_SeqBAIJ"
965 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
966 {
967   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
968   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
969   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
970   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
971   int         ridx,cidx,bs2=a->bs2,ierr;
972   MatScalar   *ap,value,*aa=a->a,*bap;
973 
974   PetscFunctionBegin;
975   for ( k=0; k<m; k++ ) { /* loop over added rows */
976     row  = im[k]; brow = row/bs;
977     if (row < 0) continue;
978 #if defined(PETSC_USE_BOPT_g)
979     if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m);
980 #endif
981     rp   = aj + ai[brow];
982     ap   = aa + bs2*ai[brow];
983     rmax = imax[brow];
984     nrow = ailen[brow];
985     low  = 0;
986     for ( l=0; l<n; l++ ) { /* loop over added columns */
987       if (in[l] < 0) continue;
988 #if defined(PETSC_USE_BOPT_g)
989       if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n);
990 #endif
991       col = in[l]; bcol = col/bs;
992       ridx = row % bs; cidx = col % bs;
993       if (roworiented) {
994         value = v[l + k*n];
995       } else {
996         value = v[k + l*m];
997       }
998       if (!sorted) low = 0; high = nrow;
999       while (high-low > 7) {
1000         t = (low+high)/2;
1001         if (rp[t] > bcol) high = t;
1002         else              low  = t;
1003       }
1004       for ( i=low; i<high; i++ ) {
1005         if (rp[i] > bcol) break;
1006         if (rp[i] == bcol) {
1007           bap  = ap +  bs2*i + bs*cidx + ridx;
1008           if (is == ADD_VALUES) *bap += value;
1009           else                  *bap  = value;
1010           goto noinsert1;
1011         }
1012       }
1013       if (nonew == 1) goto noinsert1;
1014       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1015       if (nrow >= rmax) {
1016         /* there is no extra room in row, therefore enlarge */
1017         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1018         MatScalar *new_a;
1019 
1020         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1021 
1022         /* Malloc new storage space */
1023         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1024         new_a   = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a);
1025         new_j   = (int *) (new_a + bs2*new_nz);
1026         new_i   = new_j + new_nz;
1027 
1028         /* copy over old data into new slots */
1029         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
1030         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1031         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1032         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1033         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1034         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1035         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1036         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1037         /* free up old matrix storage */
1038         PetscFree(a->a);
1039         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
1040         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1041         a->singlemalloc = 1;
1042 
1043         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1044         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1045         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1046         a->maxnz += bs2*CHUNKSIZE;
1047         a->reallocs++;
1048         a->nz++;
1049       }
1050       N = nrow++ - 1;
1051       /* shift up all the later entries in this row */
1052       for ( ii=N; ii>=i; ii-- ) {
1053         rp[ii+1] = rp[ii];
1054         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1055       }
1056       if (N>=i) {
1057         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1058       }
1059       rp[i]                      = bcol;
1060       ap[bs2*i + bs*cidx + ridx] = value;
1061       noinsert1:;
1062       low = i;
1063     }
1064     ailen[brow] = nrow;
1065   }
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1070 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1071 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1072 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1073 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1074 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec);
1075 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1076 extern int MatScale_SeqBAIJ(Scalar*,Mat);
1077 extern int MatNorm_SeqBAIJ(Mat,NormType,double *);
1078 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*);
1079 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1080 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1081 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1082 extern int MatZeroEntries_SeqBAIJ(Mat);
1083 
1084 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1085 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1086 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1087 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1088 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1089 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1090 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
1091 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1092 
1093 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1094 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1095 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1096 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1097 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1098 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1099 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*);
1100 
1101 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1102 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1103 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1104 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1105 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1106 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec);
1107 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1108 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1109 
1110 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1111 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1112 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1113 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1114 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1115 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
1116 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1117 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1118 
1119 #undef __FUNC__
1120 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1121 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1122 {
1123   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1124   Mat         outA;
1125   int         ierr;
1126   PetscTruth  row_identity, col_identity;
1127 
1128   PetscFunctionBegin;
1129   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU");
1130   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1131   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1132   if (!row_identity || !col_identity) {
1133     SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU");
1134   }
1135 
1136   outA          = inA;
1137   inA->factor   = FACTOR_LU;
1138   a->row        = row;
1139   a->col        = col;
1140 
1141   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1142   ierr = ISInvertPermutation(col,&(a->icol));CHKERRQ(ierr);
1143   PLogObjectParent(inA,a->icol);
1144 
1145   if (!a->solve_work) {
1146     a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1147     PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1148   }
1149 
1150   if (!a->diag) {
1151     ierr = MatMarkDiag_SeqBAIJ(inA);CHKERRQ(ierr);
1152   }
1153   /*
1154       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1155       for ILU(0) factorization with natural ordering
1156   */
1157   switch (a->bs) {
1158     case 2:
1159     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
1160     inA->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
1161     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1162     break;
1163   case 3:
1164     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
1165     inA->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
1166     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1167     break;
1168   case 4:
1169     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1170     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1171     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1172     break;
1173   case 5:
1174     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1175     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1176     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1177     break;
1178   case 6:
1179     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
1180     inA->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
1181     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1182     break;
1183   case 7:
1184     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
1185     inA->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
1186     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1187     break;
1188   }
1189 
1190   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1191 
1192   PetscFunctionReturn(0);
1193 }
1194 #undef __FUNC__
1195 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1196 int MatPrintHelp_SeqBAIJ(Mat A)
1197 {
1198   static int called = 0;
1199   MPI_Comm   comm = A->comm;
1200   int        ierr;
1201 
1202   PetscFunctionBegin;
1203   if (called) {PetscFunctionReturn(0);} else called = 1;
1204   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1205   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 EXTERN_C_BEGIN
1210 #undef __FUNC__
1211 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1212 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1213 {
1214   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1215   int         i,nz,n;
1216 
1217   PetscFunctionBegin;
1218   nz = baij->maxnz;
1219   n  = baij->n;
1220   for (i=0; i<nz; i++) {
1221     baij->j[i] = indices[i];
1222   }
1223   baij->nz = nz;
1224   for ( i=0; i<n; i++ ) {
1225     baij->ilen[i] = baij->imax[i];
1226   }
1227 
1228   PetscFunctionReturn(0);
1229 }
1230 EXTERN_C_END
1231 
1232 #undef __FUNC__
1233 #define __FUNC__ "MatSeqBAIJSetColumnIndices"
1234 /*@
1235     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1236        in the matrix.
1237 
1238   Input Parameters:
1239 +  mat - the SeqBAIJ matrix
1240 -  indices - the column indices
1241 
1242   Level: advanced
1243 
1244   Notes:
1245     This can be called if you have precomputed the nonzero structure of the
1246   matrix and want to provide it to the matrix object to improve the performance
1247   of the MatSetValues() operation.
1248 
1249     You MUST have set the correct numbers of nonzeros per row in the call to
1250   MatCreateSeqBAIJ().
1251 
1252     MUST be called before any calls to MatSetValues();
1253 
1254 @*/
1255 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1256 {
1257   int ierr,(*f)(Mat,int *);
1258 
1259   PetscFunctionBegin;
1260   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1261   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr);
1262   if (f) {
1263     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1264   } else {
1265     SETERRQ(1,1,"Wrong type of matrix to set column indices");
1266   }
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 /* -------------------------------------------------------------------*/
1271 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1272        MatGetRow_SeqBAIJ,
1273        MatRestoreRow_SeqBAIJ,
1274        MatMult_SeqBAIJ_N,
1275        MatMultAdd_SeqBAIJ_N,
1276        MatMultTrans_SeqBAIJ,
1277        MatMultTransAdd_SeqBAIJ,
1278        MatSolve_SeqBAIJ_N,
1279        0,
1280        0,
1281        0,
1282        MatLUFactor_SeqBAIJ,
1283        0,
1284        0,
1285        MatTranspose_SeqBAIJ,
1286        MatGetInfo_SeqBAIJ,
1287        MatEqual_SeqBAIJ,
1288        MatGetDiagonal_SeqBAIJ,
1289        MatDiagonalScale_SeqBAIJ,
1290        MatNorm_SeqBAIJ,
1291        0,
1292        MatAssemblyEnd_SeqBAIJ,
1293        0,
1294        MatSetOption_SeqBAIJ,
1295        MatZeroEntries_SeqBAIJ,
1296        MatZeroRows_SeqBAIJ,
1297        MatLUFactorSymbolic_SeqBAIJ,
1298        MatLUFactorNumeric_SeqBAIJ_N,
1299        0,
1300        0,
1301        MatGetSize_SeqBAIJ,
1302        MatGetSize_SeqBAIJ,
1303        MatGetOwnershipRange_SeqBAIJ,
1304        MatILUFactorSymbolic_SeqBAIJ,
1305        0,
1306        0,
1307        0,
1308        MatDuplicate_SeqBAIJ,
1309        0,
1310        0,
1311        MatILUFactor_SeqBAIJ,
1312        0,
1313        0,
1314        MatGetSubMatrices_SeqBAIJ,
1315        MatIncreaseOverlap_SeqBAIJ,
1316        MatGetValues_SeqBAIJ,
1317        0,
1318        MatPrintHelp_SeqBAIJ,
1319        MatScale_SeqBAIJ,
1320        0,
1321        0,
1322        0,
1323        MatGetBlockSize_SeqBAIJ,
1324        MatGetRowIJ_SeqBAIJ,
1325        MatRestoreRowIJ_SeqBAIJ,
1326        0,
1327        0,
1328        0,
1329        0,
1330        0,
1331        0,
1332        MatSetValuesBlocked_SeqBAIJ,
1333        MatGetSubMatrix_SeqBAIJ,
1334        0,
1335        0,
1336        MatGetMaps_Petsc};
1337 
1338 EXTERN_C_BEGIN
1339 #undef __FUNC__
1340 #define __FUNC__ "MatStoreValues_SeqBAIJ"
1341 int MatStoreValues_SeqBAIJ(Mat mat)
1342 {
1343   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1344   int         nz = aij->i[aij->m]*aij->bs*aij->bs2;
1345   int         ierr;
1346 
1347   PetscFunctionBegin;
1348   if (aij->nonew != 1) {
1349     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1350   }
1351 
1352   /* allocate space for values if not already there */
1353   if (!aij->saved_values) {
1354     aij->saved_values = (Scalar *) PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values);
1355   }
1356 
1357   /* copy values over */
1358   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1359   PetscFunctionReturn(0);
1360 }
1361 EXTERN_C_END
1362 
1363 EXTERN_C_BEGIN
1364 #undef __FUNC__
1365 #define __FUNC__ "MatRetrieveValues_SeqBAIJ"
1366 int MatRetrieveValues_SeqBAIJ(Mat mat)
1367 {
1368   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1369   int         nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr;
1370 
1371   PetscFunctionBegin;
1372   if (aij->nonew != 1) {
1373     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1374   }
1375   if (!aij->saved_values) {
1376     SETERRQ(1,1,"Must call MatStoreValues(A);first");
1377   }
1378 
1379   /* copy values over */
1380   ierr = PetscMemcpy(aij->a, aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1381   PetscFunctionReturn(0);
1382 }
1383 EXTERN_C_END
1384 
1385 #undef __FUNC__
1386 #define __FUNC__ "MatCreateSeqBAIJ"
1387 /*@C
1388    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1389    compressed row) format.  For good matrix assembly performance the
1390    user should preallocate the matrix storage by setting the parameter nz
1391    (or the array nnz).  By setting these parameters accurately, performance
1392    during matrix assembly can be increased by more than a factor of 50.
1393 
1394    Collective on MPI_Comm
1395 
1396    Input Parameters:
1397 +  comm - MPI communicator, set to PETSC_COMM_SELF
1398 .  bs - size of block
1399 .  m - number of rows
1400 .  n - number of columns
1401 .  nz - number of block nonzeros per block row (same for all rows)
1402 -  nnz - array containing the number of block nonzeros in the various block rows
1403          (possibly different for each block row) or PETSC_NULL
1404 
1405    Output Parameter:
1406 .  A - the matrix
1407 
1408    Options Database Keys:
1409 .   -mat_no_unroll - uses code that does not unroll the loops in the
1410                      block calculations (much slower)
1411 .    -mat_block_size - size of the blocks to use
1412 
1413    Level: intermediate
1414 
1415    Notes:
1416    The block AIJ format is fully compatible with standard Fortran 77
1417    storage.  That is, the stored row and column indices can begin at
1418    either one (as in Fortran) or zero.  See the users' manual for details.
1419 
1420    Specify the preallocated storage with either nz or nnz (not both).
1421    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1422    allocation.  For additional details, see the users manual chapter on
1423    matrices.
1424 
1425 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1426 @*/
1427 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1428 {
1429   Mat         B;
1430   Mat_SeqBAIJ *b;
1431   int         i,len,ierr,flg,mbs,nbs,bs2,size;
1432 
1433   PetscFunctionBegin;
1434   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1435   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1");
1436 
1437   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1438   mbs  = m/bs;
1439   nbs  = n/bs;
1440   bs2  = bs*bs;
1441 
1442   if (mbs*bs!=m || nbs*bs!=n) {
1443     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize");
1444   }
1445 
1446   *A = 0;
1447   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView);
1448   PLogObjectCreate(B);
1449   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b);
1450   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1451   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1452   ierr    = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1453   if (!flg) {
1454     switch (bs) {
1455     case 1:
1456       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1457       B->ops->solve           = MatSolve_SeqBAIJ_1;
1458       B->ops->mult            = MatMult_SeqBAIJ_1;
1459       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1460       break;
1461     case 2:
1462       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1463       B->ops->solve           = MatSolve_SeqBAIJ_2;
1464       B->ops->mult            = MatMult_SeqBAIJ_2;
1465       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1466       break;
1467     case 3:
1468       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1469       B->ops->solve           = MatSolve_SeqBAIJ_3;
1470       B->ops->mult            = MatMult_SeqBAIJ_3;
1471       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1472       break;
1473     case 4:
1474       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1475       B->ops->solve           = MatSolve_SeqBAIJ_4;
1476       B->ops->mult            = MatMult_SeqBAIJ_4;
1477       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1478       break;
1479     case 5:
1480       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1481       B->ops->solve           = MatSolve_SeqBAIJ_5;
1482       B->ops->mult            = MatMult_SeqBAIJ_5;
1483       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1484       break;
1485     case 6:
1486       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1487       B->ops->solve           = MatSolve_SeqBAIJ_6;
1488       B->ops->mult            = MatMult_SeqBAIJ_6;
1489       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1490       break;
1491     case 7:
1492       B->ops->mult            = MatMult_SeqBAIJ_7;
1493       B->ops->solve           = MatSolve_SeqBAIJ_7;
1494       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1495       break;
1496     }
1497   }
1498   B->ops->destroy     = MatDestroy_SeqBAIJ;
1499   B->ops->view        = MatView_SeqBAIJ;
1500   B->factor           = 0;
1501   B->lupivotthreshold = 1.0;
1502   B->mapping          = 0;
1503   b->row              = 0;
1504   b->col              = 0;
1505   b->icol             = 0;
1506   b->reallocs         = 0;
1507   b->saved_values     = 0;
1508 
1509   b->m       = m; B->m = m; B->M = m;
1510   b->n       = n; B->n = n; B->N = n;
1511 
1512   ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr);
1513   ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr);
1514 
1515   b->mbs     = mbs;
1516   b->nbs     = nbs;
1517   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(b->imax);
1518   if (nnz == PETSC_NULL) {
1519     if (nz == PETSC_DEFAULT) nz = 5;
1520     else if (nz <= 0)        nz = 1;
1521     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1522     nz = nz*mbs;
1523   } else {
1524     nz = 0;
1525     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1526   }
1527 
1528   /* allocate the matrix space */
1529   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int);
1530   b->a  = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a);
1531   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1532   b->j  = (int *) (b->a + nz*bs2);
1533   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1534   b->i  = b->j + nz;
1535   b->singlemalloc = 1;
1536 
1537   b->i[0] = 0;
1538   for (i=1; i<mbs+1; i++) {
1539     b->i[i] = b->i[i-1] + b->imax[i-1];
1540   }
1541 
1542   /* b->ilen will count nonzeros in each block row so far. */
1543   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1544   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1545   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1546 
1547   b->bs               = bs;
1548   b->bs2              = bs2;
1549   b->mbs              = mbs;
1550   b->nz               = 0;
1551   b->maxnz            = nz*bs2;
1552   b->sorted           = 0;
1553   b->roworiented      = 1;
1554   b->nonew            = 0;
1555   b->diag             = 0;
1556   b->solve_work       = 0;
1557   b->mult_work        = 0;
1558   b->spptr            = 0;
1559   B->info.nz_unneeded = (double)b->maxnz;
1560 
1561   *A = B;
1562   ierr = OptionsHasName(PETSC_NULL,"-help", &flg);CHKERRQ(ierr);
1563   if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); }
1564 
1565   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1566                                      "MatStoreValues_SeqBAIJ",
1567                                      (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1568   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1569                                      "MatRetrieveValues_SeqBAIJ",
1570                                      (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1571   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1572                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1573                                      (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1574   PetscFunctionReturn(0);
1575 }
1576 
1577 #undef __FUNC__
1578 #define __FUNC__ "MatDuplicate_SeqBAIJ"
1579 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1580 {
1581   Mat         C;
1582   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1583   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1584 
1585   PetscFunctionBegin;
1586   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix");
1587 
1588   *B = 0;
1589   PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView);
1590   PLogObjectCreate(C);
1591   C->data         = (void *) (c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c);
1592   ierr            = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1593   C->ops->destroy = MatDestroy_SeqBAIJ;
1594   C->ops->view    = MatView_SeqBAIJ;
1595   C->factor       = A->factor;
1596   c->row          = 0;
1597   c->col          = 0;
1598   c->icol         = 0;
1599   c->saved_values = 0;
1600   C->assembled    = PETSC_TRUE;
1601 
1602   c->m = C->m   = a->m;
1603   c->n = C->n   = a->n;
1604   C->M          = a->m;
1605   C->N          = a->n;
1606 
1607   c->bs         = a->bs;
1608   c->bs2        = a->bs2;
1609   c->mbs        = a->mbs;
1610   c->nbs        = a->nbs;
1611 
1612   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax);
1613   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen);
1614   for ( i=0; i<mbs; i++ ) {
1615     c->imax[i] = a->imax[i];
1616     c->ilen[i] = a->ilen[i];
1617   }
1618 
1619   /* allocate the matrix space */
1620   c->singlemalloc = 1;
1621   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1622   c->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(c->a);
1623   c->j = (int *) (c->a + nz*bs2);
1624   c->i = c->j + nz;
1625   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1626   if (mbs > 0) {
1627     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1628     if (cpvalues == MAT_COPY_VALUES) {
1629       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1630     } else {
1631       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1632     }
1633   }
1634 
1635   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1636   c->sorted      = a->sorted;
1637   c->roworiented = a->roworiented;
1638   c->nonew       = a->nonew;
1639 
1640   if (a->diag) {
1641     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(c->diag);
1642     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1643     for ( i=0; i<mbs; i++ ) {
1644       c->diag[i] = a->diag[i];
1645     }
1646   } else c->diag        = 0;
1647   c->nz                 = a->nz;
1648   c->maxnz              = a->maxnz;
1649   c->solve_work         = 0;
1650   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1651   c->mult_work          = 0;
1652   *B = C;
1653   ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1654   PetscFunctionReturn(0);
1655 }
1656 
1657 #undef __FUNC__
1658 #define __FUNC__ "MatLoad_SeqBAIJ"
1659 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1660 {
1661   Mat_SeqBAIJ  *a;
1662   Mat          B;
1663   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1664   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1665   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1666   int          *masked, nmask,tmp,bs2,ishift;
1667   Scalar       *aa;
1668   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1669 
1670   PetscFunctionBegin;
1671   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1672   bs2  = bs*bs;
1673 
1674   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1675   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor");
1676   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1677   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1678   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object");
1679   M = header[1]; N = header[2]; nz = header[3];
1680 
1681   if (header[3] < 0) {
1682     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ");
1683   }
1684 
1685   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
1686 
1687   /*
1688      This code adds extra rows to make sure the number of rows is
1689     divisible by the blocksize
1690   */
1691   mbs        = M/bs;
1692   extra_rows = bs - M + bs*(mbs);
1693   if (extra_rows == bs) extra_rows = 0;
1694   else                  mbs++;
1695   if (extra_rows) {
1696     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1697   }
1698 
1699   /* read in row lengths */
1700   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1701   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1702   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1703 
1704   /* read in column indices */
1705   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) );CHKPTRQ(jj);
1706   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1707   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1708 
1709   /* loop over row lengths determining block row lengths */
1710   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1711   ierr        = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1712   mask        = (int *) PetscMalloc( 2*mbs*sizeof(int) );CHKPTRQ(mask);
1713   ierr        = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1714   masked      = mask + mbs;
1715   rowcount    = 0; nzcount = 0;
1716   for ( i=0; i<mbs; i++ ) {
1717     nmask = 0;
1718     for ( j=0; j<bs; j++ ) {
1719       kmax = rowlengths[rowcount];
1720       for ( k=0; k<kmax; k++ ) {
1721         tmp = jj[nzcount++]/bs;
1722         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1723       }
1724       rowcount++;
1725     }
1726     browlengths[i] += nmask;
1727     /* zero out the mask elements we set */
1728     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1729   }
1730 
1731   /* create our matrix */
1732   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1733   B = *A;
1734   a = (Mat_SeqBAIJ *) B->data;
1735 
1736   /* set matrix "i" values */
1737   a->i[0] = 0;
1738   for ( i=1; i<= mbs; i++ ) {
1739     a->i[i]      = a->i[i-1] + browlengths[i-1];
1740     a->ilen[i-1] = browlengths[i-1];
1741   }
1742   a->nz         = 0;
1743   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1744 
1745   /* read in nonzero values */
1746   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1747   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1748   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1749 
1750   /* set "a" and "j" values into matrix */
1751   nzcount = 0; jcount = 0;
1752   for ( i=0; i<mbs; i++ ) {
1753     nzcountb = nzcount;
1754     nmask    = 0;
1755     for ( j=0; j<bs; j++ ) {
1756       kmax = rowlengths[i*bs+j];
1757       for ( k=0; k<kmax; k++ ) {
1758         tmp = jj[nzcount++]/bs;
1759 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1760       }
1761       rowcount++;
1762     }
1763     /* sort the masked values */
1764     PetscSortInt(nmask,masked);
1765 
1766     /* set "j" values into matrix */
1767     maskcount = 1;
1768     for ( j=0; j<nmask; j++ ) {
1769       a->j[jcount++]  = masked[j];
1770       mask[masked[j]] = maskcount++;
1771     }
1772     /* set "a" values into matrix */
1773     ishift = bs2*a->i[i];
1774     for ( j=0; j<bs; j++ ) {
1775       kmax = rowlengths[i*bs+j];
1776       for ( k=0; k<kmax; k++ ) {
1777         tmp       = jj[nzcountb]/bs ;
1778         block     = mask[tmp] - 1;
1779         point     = jj[nzcountb] - bs*tmp;
1780         idx       = ishift + bs2*block + j + bs*point;
1781         a->a[idx] = aa[nzcountb++];
1782       }
1783     }
1784     /* zero out the mask elements we set */
1785     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1786   }
1787   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix");
1788 
1789   PetscFree(rowlengths);
1790   PetscFree(browlengths);
1791   PetscFree(aa);
1792   PetscFree(jj);
1793   PetscFree(mask);
1794 
1795   B->assembled = PETSC_TRUE;
1796 
1797   ierr = MatView_Private(B);CHKERRQ(ierr);
1798   PetscFunctionReturn(0);
1799 }
1800 
1801 
1802 
1803