xref: /petsc/src/mat/impls/baij/seq/baij.c (revision fd16b1779ca341c090fa91c269e2c2dd5dd0b73d)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: baij.c,v 1.182 1999/09/15 16:26:29 bsmith Exp bsmith $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 #include "sys.h"
10 #include "src/mat/impls/baij/seq/baij.h"
11 #include "src/vec/vecimpl.h"
12 #include "src/inline/spops.h"
13 
14 #define CHUNKSIZE  10
15 
16 /*
17      Checks for missing diagonals
18 */
19 #undef __FUNC__
20 #define __FUNC__ "MatMissingDiag_SeqBAIJ"
21 int MatMissingDiag_SeqBAIJ(Mat A)
22 {
23   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
24   int         *diag = a->diag, *jj = a->j,i;
25 
26   PetscFunctionBegin;
27   for ( i=0; i<a->mbs; i++ ) {
28     if (jj[diag[i]] != i) {
29       SETERRQ1(1,1,"Matrix is missing diagonal number %d",i);
30     }
31   }
32   PetscFunctionReturn(0);
33 }
34 
35 #undef __FUNC__
36 #define __FUNC__ "MatMarkDiag_SeqBAIJ"
37 int MatMarkDiag_SeqBAIJ(Mat A)
38 {
39   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
40   int         i,j, *diag, m = a->mbs;
41 
42   PetscFunctionBegin;
43   diag = (int *) PetscMalloc( (m+1)*sizeof(int));CHKPTRQ(diag);
44   PLogObjectMemory(A,(m+1)*sizeof(int));
45   for ( i=0; i<m; i++ ) {
46     diag[i] = a->i[i+1];
47     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
48       if (a->j[j] == i) {
49         diag[i] = j;
50         break;
51       }
52     }
53   }
54   a->diag = diag;
55   PetscFunctionReturn(0);
56 }
57 
58 
59 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
60 
61 #undef __FUNC__
62 #define __FUNC__ "MatGetRowIJ_SeqBAIJ"
63 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
64                             PetscTruth *done)
65 {
66   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
67   int         ierr, n = a->mbs,i;
68 
69   PetscFunctionBegin;
70   *nn = n;
71   if (!ia) PetscFunctionReturn(0);
72   if (symmetric) {
73     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
74   } else if (oshift == 1) {
75     /* temporarily add 1 to i and j indices */
76     int nz = a->i[n] + 1;
77     for ( i=0; i<nz; i++ ) a->j[i]++;
78     for ( i=0; i<n+1; i++ ) a->i[i]++;
79     *ia = a->i; *ja = a->j;
80   } else {
81     *ia = a->i; *ja = a->j;
82   }
83 
84   PetscFunctionReturn(0);
85 }
86 
87 #undef __FUNC__
88 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ"
89 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
90                                 PetscTruth *done)
91 {
92   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
93   int         i,n = a->mbs,ierr;
94 
95   PetscFunctionBegin;
96   if (!ia) PetscFunctionReturn(0);
97   if (symmetric) {
98     ierr = PetscFree(*ia);CHKERRQ(ierr);
99     ierr = PetscFree(*ja);CHKERRQ(ierr);
100   } else if (oshift == 1) {
101     int nz = a->i[n];
102     for ( i=0; i<nz; i++ ) a->j[i]--;
103     for ( i=0; i<n+1; i++ ) a->i[i]--;
104   }
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNC__
109 #define __FUNC__ "MatGetBlockSize_SeqBAIJ"
110 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
111 {
112   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
113 
114   PetscFunctionBegin;
115   *bs = baij->bs;
116   PetscFunctionReturn(0);
117 }
118 
119 
120 #undef __FUNC__
121 #define __FUNC__ "MatDestroy_SeqBAIJ"
122 int MatDestroy_SeqBAIJ(Mat A)
123 {
124   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
125   int         ierr;
126 
127   PetscFunctionBegin;
128   if (A->mapping) {
129     ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr);
130   }
131   if (A->bmapping) {
132     ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr);
133   }
134   if (A->rmap) {
135     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
136   }
137   if (A->cmap) {
138     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
139   }
140 #if defined(PETSC_USE_LOG)
141   PLogObjectState((PetscObject) A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
142 #endif
143   ierr = PetscFree(a->a);CHKERRQ(ierr);
144   if (!a->singlemalloc) {
145     ierr = PetscFree(a->i);CHKERRQ(ierr);
146     ierr = PetscFree(a->j);CHKERRQ(ierr);
147   }
148   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
149   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
150   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
151   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
152   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
153   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
154   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
155   ierr = PetscFree(a);CHKERRQ(ierr);
156   PLogObjectDestroy(A);
157   PetscHeaderDestroy(A);
158   PetscFunctionReturn(0);
159 }
160 
161 #undef __FUNC__
162 #define __FUNC__ "MatSetOption_SeqBAIJ"
163 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
164 {
165   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
166 
167   PetscFunctionBegin;
168   if      (op == MAT_ROW_ORIENTED)                 a->roworiented = 1;
169   else if (op == MAT_COLUMN_ORIENTED)              a->roworiented = 0;
170   else if (op == MAT_COLUMNS_SORTED)               a->sorted      = 1;
171   else if (op == MAT_COLUMNS_UNSORTED)             a->sorted      = 0;
172   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)     a->nonew       = 1;
173   else if (op == MAT_NEW_NONZERO_LOCATION_ERR)     a->nonew       = -1;
174   else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR)   a->nonew       = -2;
175   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS)    a->nonew       = 0;
176   else if (op == MAT_ROWS_SORTED ||
177            op == MAT_ROWS_UNSORTED ||
178            op == MAT_SYMMETRIC ||
179            op == MAT_STRUCTURALLY_SYMMETRIC ||
180            op == MAT_YES_NEW_DIAGONALS ||
181            op == MAT_IGNORE_OFF_PROC_ENTRIES ||
182            op == MAT_USE_HASH_TABLE) {
183     PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
184   } else if (op == MAT_NO_NEW_DIAGONALS) {
185     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
186   } else {
187     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
188   }
189   PetscFunctionReturn(0);
190 }
191 
192 
193 #undef __FUNC__
194 #define __FUNC__ "MatGetSize_SeqBAIJ"
195 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
196 {
197   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
198 
199   PetscFunctionBegin;
200   if (m) *m = a->m;
201   if (n) *n = a->n;
202   PetscFunctionReturn(0);
203 }
204 
205 #undef __FUNC__
206 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ"
207 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
208 {
209   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
210 
211   PetscFunctionBegin;
212   *m = 0; *n = a->m;
213   PetscFunctionReturn(0);
214 }
215 
216 #undef __FUNC__
217 #define __FUNC__ "MatGetRow_SeqBAIJ"
218 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
219 {
220   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ *) A->data;
221   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
222   MatScalar    *aa,*aa_i;
223   Scalar       *v_i;
224 
225   PetscFunctionBegin;
226   bs  = a->bs;
227   ai  = a->i;
228   aj  = a->j;
229   aa  = a->a;
230   bs2 = a->bs2;
231 
232   if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range");
233 
234   bn  = row/bs;   /* Block number */
235   bp  = row % bs; /* Block Position */
236   M   = ai[bn+1] - ai[bn];
237   *nz = bs*M;
238 
239   if (v) {
240     *v = 0;
241     if (*nz) {
242       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) );CHKPTRQ(*v);
243       for ( i=0; i<M; i++ ) { /* for each block in the block row */
244         v_i  = *v + i*bs;
245         aa_i = aa + bs2*(ai[bn] + i);
246         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
247       }
248     }
249   }
250 
251   if (idx) {
252     *idx = 0;
253     if (*nz) {
254       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) );CHKPTRQ(*idx);
255       for ( i=0; i<M; i++ ) { /* for each block in the block row */
256         idx_i = *idx + i*bs;
257         itmp  = bs*aj[ai[bn] + i];
258         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
259       }
260     }
261   }
262   PetscFunctionReturn(0);
263 }
264 
265 #undef __FUNC__
266 #define __FUNC__ "MatRestoreRow_SeqBAIJ"
267 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
268 {
269   int ierr;
270 
271   PetscFunctionBegin;
272   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
273   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNC__
278 #define __FUNC__ "MatTranspose_SeqBAIJ"
279 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
280 {
281   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
282   Mat         C;
283   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
284   int         *rows,*cols,bs2=a->bs2;
285   MatScalar   *array = a->a;
286 
287   PetscFunctionBegin;
288   if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place");
289   col  = (int *) PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col);
290   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
291 
292   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
293   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
294   ierr = PetscFree(col);CHKERRQ(ierr);
295   rows = (int *) PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows);
296   cols = rows + bs;
297   for ( i=0; i<mbs; i++ ) {
298     cols[0] = i*bs;
299     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
300     len = ai[i+1] - ai[i];
301     for ( j=0; j<len; j++ ) {
302       rows[0] = (*aj++)*bs;
303       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
304       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
305       array += bs2;
306     }
307   }
308   ierr = PetscFree(rows);CHKERRQ(ierr);
309 
310   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
311   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
312 
313   if (B != PETSC_NULL) {
314     *B = C;
315   } else {
316     PetscOps *Abops;
317     MatOps   Aops;
318 
319     /* This isn't really an in-place transpose */
320     ierr = PetscFree(a->a);CHKERRQ(ierr);
321     if (!a->singlemalloc) {
322       ierr = PetscFree(a->i);CHKERRQ(ierr);
323       ierr = PetscFree(a->j);CHKERRQ(ierr);
324     }
325     if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
326     if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
327     if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
328     if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
329     ierr = PetscFree(a);CHKERRQ(ierr);
330 
331 
332     ierr = MapDestroy(A->rmap);CHKERRQ(ierr);
333     ierr = MapDestroy(A->cmap);CHKERRQ(ierr);
334 
335     /*
336        This is horrible, horrible code. We need to keep the
337       A pointers for the bops and ops but copy everything
338       else from C.
339     */
340     Abops    = A->bops;
341     Aops     = A->ops;
342     ierr     = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr);
343     A->bops  = Abops;
344     A->ops   = Aops;
345     A->qlist = 0;
346 
347     PetscHeaderDestroy(C);
348   }
349   PetscFunctionReturn(0);
350 }
351 
352 #undef __FUNC__
353 #define __FUNC__ "MatView_SeqBAIJ_Binary"
354 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
355 {
356   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
357   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
358   Scalar      *aa;
359   FILE        *file;
360 
361   PetscFunctionBegin;
362   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
363   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
364   col_lens[0] = MAT_COOKIE;
365 
366   col_lens[1] = a->m;
367   col_lens[2] = a->n;
368   col_lens[3] = a->nz*bs2;
369 
370   /* store lengths of each row and write (including header) to file */
371   count = 0;
372   for ( i=0; i<a->mbs; i++ ) {
373     for ( j=0; j<bs; j++ ) {
374       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
375     }
376   }
377   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr);
378   ierr = PetscFree(col_lens);CHKERRQ(ierr);
379 
380   /* store column indices (zero start index) */
381   jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) );CHKPTRQ(jj);
382   count = 0;
383   for ( i=0; i<a->mbs; i++ ) {
384     for ( j=0; j<bs; j++ ) {
385       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
386         for ( l=0; l<bs; l++ ) {
387           jj[count++] = bs*a->j[k] + l;
388         }
389       }
390     }
391   }
392   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
393   ierr = PetscFree(jj);CHKERRQ(ierr);
394 
395   /* store nonzero values */
396   aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa);
397   count = 0;
398   for ( i=0; i<a->mbs; i++ ) {
399     for ( j=0; j<bs; j++ ) {
400       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
401         for ( l=0; l<bs; l++ ) {
402           aa[count++] = a->a[bs2*k + l*bs + j];
403         }
404       }
405     }
406   }
407   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
408   ierr = PetscFree(aa);CHKERRQ(ierr);
409 
410   ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
411   if (file) {
412     fprintf(file,"-matload_block_size %d\n",a->bs);
413   }
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNC__
418 #define __FUNC__ "MatView_SeqBAIJ_ASCII"
419 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
420 {
421   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
422   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
423   FILE        *fd;
424   char        *outputname;
425 
426   PetscFunctionBegin;
427   ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
428   ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr);
429   ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
430   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
431     ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
432   } else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
433     SETERRQ(PETSC_ERR_SUP,0,"Socket format not supported");
434   } else if (format == VIEWER_FORMAT_ASCII_COMMON) {
435     for ( i=0; i<a->mbs; i++ ) {
436       for ( j=0; j<bs; j++ ) {
437         fprintf(fd,"row %d:",i*bs+j);
438         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
439           for ( l=0; l<bs; l++ ) {
440 #if defined(PETSC_USE_COMPLEX)
441             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
442               fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
443                       PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));
444             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0 && PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
445               fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
446                       PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));
447             } else if (PetscReal(a->a[bs2*k + l*bs + j]) != 0.0) {
448               fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));
449             }
450 #else
451             if (a->a[bs2*k + l*bs + j] != 0.0) {
452               fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
453             }
454 #endif
455           }
456         }
457         fprintf(fd,"\n");
458       }
459     }
460   } else {
461     for ( i=0; i<a->mbs; i++ ) {
462       for ( j=0; j<bs; j++ ) {
463         fprintf(fd,"row %d:",i*bs+j);
464         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
465           for ( l=0; l<bs; l++ ) {
466 #if defined(PETSC_USE_COMPLEX)
467             if (PetscImaginary(a->a[bs2*k + l*bs + j]) > 0.0) {
468               fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
469                 PetscReal(a->a[bs2*k + l*bs + j]),PetscImaginary(a->a[bs2*k + l*bs + j]));
470             } else if (PetscImaginary(a->a[bs2*k + l*bs + j]) < 0.0) {
471               fprintf(fd," %d %g - %g i",bs*a->j[k]+l,
472                 PetscReal(a->a[bs2*k + l*bs + j]),-PetscImaginary(a->a[bs2*k + l*bs + j]));
473             } else {
474               fprintf(fd," %d %g ",bs*a->j[k]+l,PetscReal(a->a[bs2*k + l*bs + j]));
475             }
476 #else
477             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
478 #endif
479           }
480         }
481         fprintf(fd,"\n");
482       }
483     }
484   }
485   fflush(fd);
486   PetscFunctionReturn(0);
487 }
488 
489 #undef __FUNC__
490 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom"
491 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa)
492 {
493   Mat          A = (Mat) Aa;
494   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
495   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank;
496   double       xl,yl,xr,yr,x_l,x_r,y_l,y_r;
497   MatScalar    *aa;
498   MPI_Comm     comm;
499   Viewer       viewer;
500 
501   PetscFunctionBegin;
502   /*
503       This is nasty. If this is called from an originally parallel matrix
504    then all processes call this, but only the first has the matrix so the
505    rest should return immediately.
506   */
507   ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr);
508   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
509   if (rank) PetscFunctionReturn(0);
510 
511   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*) &viewer);CHKERRQ(ierr);
512 
513   ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
514 
515   /* loop over matrix elements drawing boxes */
516   color = DRAW_BLUE;
517   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
518     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
519       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
520       x_l = a->j[j]*bs; x_r = x_l + 1.0;
521       aa = a->a + j*bs2;
522       for ( k=0; k<bs; k++ ) {
523         for ( l=0; l<bs; l++ ) {
524           if (PetscReal(*aa++) >=  0.) continue;
525           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
526         }
527       }
528     }
529   }
530   color = DRAW_CYAN;
531   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
532     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
533       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
534       x_l = a->j[j]*bs; x_r = x_l + 1.0;
535       aa = a->a + j*bs2;
536       for ( k=0; k<bs; k++ ) {
537         for ( l=0; l<bs; l++ ) {
538           if (PetscReal(*aa++) != 0.) continue;
539           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
540         }
541       }
542     }
543   }
544 
545   color = DRAW_RED;
546   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
547     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
548       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
549       x_l = a->j[j]*bs; x_r = x_l + 1.0;
550       aa = a->a + j*bs2;
551       for ( k=0; k<bs; k++ ) {
552         for ( l=0; l<bs; l++ ) {
553           if (PetscReal(*aa++) <= 0.) continue;
554           ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
555         }
556       }
557     }
558   }
559   PetscFunctionReturn(0);
560 }
561 
562 #undef __FUNC__
563 #define __FUNC__ "MatView_SeqBAIJ_Draw"
564 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
565 {
566   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
567   int          ierr;
568   double       xl,yl,xr,yr,w,h;
569   Draw         draw;
570   PetscTruth   isnull;
571 
572   PetscFunctionBegin;
573 
574   ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
575   ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
576 
577   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
578   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
579   xr += w;    yr += h;  xl = -w;     yl = -h;
580   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
581   ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
582   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
583   PetscFunctionReturn(0);
584 }
585 
586 #undef __FUNC__
587 #define __FUNC__ "MatView_SeqBAIJ"
588 int MatView_SeqBAIJ(Mat A,Viewer viewer)
589 {
590   ViewerType  vtype;
591   int         ierr;
592 
593   PetscFunctionBegin;
594   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
595   if (PetscTypeCompare(vtype,SOCKET_VIEWER)) {
596     SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported");
597   } else if (PetscTypeCompare(vtype,ASCII_VIEWER)){
598     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
599   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
600     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
601   } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
602     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
603   } else {
604     SETERRQ(1,1,"Viewer type not supported by PETSc object");
605   }
606   PetscFunctionReturn(0);
607 }
608 
609 
610 #undef __FUNC__
611 #define __FUNC__ "MatGetValues_SeqBAIJ"
612 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
613 {
614   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
615   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
616   int        *ai = a->i, *ailen = a->ilen;
617   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
618   MatScalar  *ap, *aa = a->a, zero = 0.0;
619 
620   PetscFunctionBegin;
621   for ( k=0; k<m; k++ ) { /* loop over rows */
622     row  = im[k]; brow = row/bs;
623     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
624     if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
625     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
626     nrow = ailen[brow];
627     for ( l=0; l<n; l++ ) { /* loop over columns */
628       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
629       if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
630       col  = in[l] ;
631       bcol = col/bs;
632       cidx = col%bs;
633       ridx = row%bs;
634       high = nrow;
635       low  = 0; /* assume unsorted */
636       while (high-low > 5) {
637         t = (low+high)/2;
638         if (rp[t] > bcol) high = t;
639         else             low  = t;
640       }
641       for ( i=low; i<high; i++ ) {
642         if (rp[i] > bcol) break;
643         if (rp[i] == bcol) {
644           *v++ = ap[bs2*i+bs*cidx+ridx];
645           goto finished;
646         }
647       }
648       *v++ = zero;
649       finished:;
650     }
651   }
652   PetscFunctionReturn(0);
653 }
654 
655 
656 #undef __FUNC__
657 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ"
658 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
659 {
660   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
661   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
662   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented;
663   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
664   Scalar      *value = v;
665   MatScalar   *ap,*aa=a->a,*bap;
666 
667   PetscFunctionBegin;
668   if (roworiented) {
669     stepval = (n-1)*bs;
670   } else {
671     stepval = (m-1)*bs;
672   }
673   for ( k=0; k<m; k++ ) { /* loop over added rows */
674     row  = im[k];
675     if (row < 0) continue;
676 #if defined(PETSC_USE_BOPT_g)
677     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
678 #endif
679     rp   = aj + ai[row];
680     ap   = aa + bs2*ai[row];
681     rmax = imax[row];
682     nrow = ailen[row];
683     low  = 0;
684     for ( l=0; l<n; l++ ) { /* loop over added columns */
685       if (in[l] < 0) continue;
686 #if defined(PETSC_USE_BOPT_g)
687       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
688 #endif
689       col = in[l];
690       if (roworiented) {
691         value = v + k*(stepval+bs)*bs + l*bs;
692       } else {
693         value = v + l*(stepval+bs)*bs + k*bs;
694       }
695       if (!sorted) low = 0; high = nrow;
696       while (high-low > 7) {
697         t = (low+high)/2;
698         if (rp[t] > col) high = t;
699         else             low  = t;
700       }
701       for ( i=low; i<high; i++ ) {
702         if (rp[i] > col) break;
703         if (rp[i] == col) {
704           bap  = ap +  bs2*i;
705           if (roworiented) {
706             if (is == ADD_VALUES) {
707               for ( ii=0; ii<bs; ii++,value+=stepval ) {
708                 for (jj=ii; jj<bs2; jj+=bs ) {
709                   bap[jj] += *value++;
710                 }
711               }
712             } else {
713               for ( ii=0; ii<bs; ii++,value+=stepval ) {
714                 for (jj=ii; jj<bs2; jj+=bs ) {
715                   bap[jj] = *value++;
716                 }
717               }
718             }
719           } else {
720             if (is == ADD_VALUES) {
721               for ( ii=0; ii<bs; ii++,value+=stepval ) {
722                 for (jj=0; jj<bs; jj++ ) {
723                   *bap++ += *value++;
724                 }
725               }
726             } else {
727               for ( ii=0; ii<bs; ii++,value+=stepval ) {
728                 for (jj=0; jj<bs; jj++ ) {
729                   *bap++  = *value++;
730                 }
731               }
732             }
733           }
734           goto noinsert2;
735         }
736       }
737       if (nonew == 1) goto noinsert2;
738       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
739       if (nrow >= rmax) {
740         /* there is no extra room in row, therefore enlarge */
741         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
742         MatScalar *new_a;
743 
744         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
745 
746         /* malloc new storage space */
747         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
748         new_a   = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a);
749         new_j   = (int *) (new_a + bs2*new_nz);
750         new_i   = new_j + new_nz;
751 
752         /* copy over old data into new slots */
753         for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];}
754         for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
755         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
756         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
757         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
758         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
759         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
760         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
761         /* free up old matrix storage */
762         ierr = PetscFree(a->a);CHKERRQ(ierr);
763         if (!a->singlemalloc) {
764           ierr = PetscFree(a->i);CHKERRQ(ierr);
765           ierr = PetscFree(a->j);CHKERRQ(ierr);
766         }
767         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
768         a->singlemalloc = 1;
769 
770         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
771         rmax = imax[row] = imax[row] + CHUNKSIZE;
772         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
773         a->maxnz += bs2*CHUNKSIZE;
774         a->reallocs++;
775         a->nz++;
776       }
777       N = nrow++ - 1;
778       /* shift up all the later entries in this row */
779       for ( ii=N; ii>=i; ii-- ) {
780         rp[ii+1] = rp[ii];
781         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
782       }
783       if (N >= i) {
784         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
785       }
786       rp[i] = col;
787       bap   = ap +  bs2*i;
788       if (roworiented) {
789         for ( ii=0; ii<bs; ii++,value+=stepval) {
790           for (jj=ii; jj<bs2; jj+=bs ) {
791             bap[jj] = *value++;
792           }
793         }
794       } else {
795         for ( ii=0; ii<bs; ii++,value+=stepval ) {
796           for (jj=0; jj<bs; jj++ ) {
797             *bap++  = *value++;
798           }
799         }
800       }
801       noinsert2:;
802       low = i;
803     }
804     ailen[row] = nrow;
805   }
806   PetscFunctionReturn(0);
807 }
808 
809 
810 #undef __FUNC__
811 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ"
812 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
813 {
814   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
815   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
816   int        m = a->m,*ip, N, *ailen = a->ilen;
817   int        mbs = a->mbs, bs2 = a->bs2,rmax = 0,ierr;
818   MatScalar  *aa = a->a, *ap;
819 
820   PetscFunctionBegin;
821   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
822 
823   if (m) rmax = ailen[0];
824   for ( i=1; i<mbs; i++ ) {
825     /* move each row back by the amount of empty slots (fshift) before it*/
826     fshift += imax[i-1] - ailen[i-1];
827     rmax   = PetscMax(rmax,ailen[i]);
828     if (fshift) {
829       ip = aj + ai[i]; ap = aa + bs2*ai[i];
830       N = ailen[i];
831       for ( j=0; j<N; j++ ) {
832         ip[j-fshift] = ip[j];
833         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
834       }
835     }
836     ai[i] = ai[i-1] + ailen[i-1];
837   }
838   if (mbs) {
839     fshift += imax[mbs-1] - ailen[mbs-1];
840     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
841   }
842   /* reset ilen and imax for each row */
843   for ( i=0; i<mbs; i++ ) {
844     ailen[i] = imax[i] = ai[i+1] - ai[i];
845   }
846   a->nz = ai[mbs];
847 
848   /* diagonals may have moved, so kill the diagonal pointers */
849   if (fshift && a->diag) {
850     ierr = PetscFree(a->diag);CHKERRQ(ierr);
851     PLogObjectMemory(A,-(m+1)*sizeof(int));
852     a->diag = 0;
853   }
854   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
855            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
856   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
857            a->reallocs);
858   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
859   a->reallocs          = 0;
860   A->info.nz_unneeded  = (double)fshift*bs2;
861 
862   PetscFunctionReturn(0);
863 }
864 
865 
866 
867 /*
868    This function returns an array of flags which indicate the locations of contiguous
869    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
870    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
871    Assume: sizes should be long enough to hold all the values.
872 */
873 #undef __FUNC__
874 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks"
875 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
876 {
877   int        i,j,k,row;
878   PetscTruth flg;
879 
880   PetscFunctionBegin;
881   for ( i=0,j=0; i<n; j++ ) {
882     row = idx[i];
883     if (row%bs!=0) { /* Not the begining of a block */
884       sizes[j] = 1;
885       i++;
886     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
887       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
888       i++;
889     } else { /* Begining of the block, so check if the complete block exists */
890       flg = PETSC_TRUE;
891       for ( k=1; k<bs; k++ ) {
892         if (row+k != idx[i+k]) { /* break in the block */
893           flg = PETSC_FALSE;
894           break;
895         }
896       }
897       if (flg == PETSC_TRUE) { /* No break in the bs */
898         sizes[j] = bs;
899         i+= bs;
900       } else {
901         sizes[j] = 1;
902         i++;
903       }
904     }
905   }
906   *bs_max = j;
907   PetscFunctionReturn(0);
908 }
909 
910 #undef __FUNC__
911 #define __FUNC__ "MatZeroRows_SeqBAIJ"
912 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
913 {
914   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
915   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
916   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
917   Scalar      zero = 0.0;
918   MatScalar   *aa;
919 
920   PetscFunctionBegin;
921   /* Make a copy of the IS and  sort it */
922   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
923   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
924 
925   /* allocate memory for rows,sizes */
926   rows  = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows);
927   sizes = rows + is_n;
928 
929   /* initialize copy IS valurs to rows, and sort them */
930   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
931   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
932   ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
933   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
934 
935   for ( i=0,j=0; i<bs_max; j+=sizes[i],i++ ) {
936     row   = rows[j];
937     if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row);
938     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
939     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
940     if (sizes[i] == bs) {
941       if (diag) {
942         if (baij->ilen[row/bs] > 0) {
943           baij->ilen[row/bs] = 1;
944           baij->j[baij->i[row/bs]] = row/bs;
945           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
946         }
947         /* Now insert all the diagoanl values for this bs */
948         for ( k=0; k<bs; k++ ) {
949           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
950         }
951       } else { /* (!diag) */
952         baij->ilen[row/bs] = 0;
953       } /* end (!diag) */
954     } else { /* (sizes[i] != bs) */
955 #if defined (PETSC_USE_DEBUG)
956       if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1");
957 #endif
958       for ( k=0; k<count; k++ ) {
959         aa[0] = zero;
960         aa+=bs;
961       }
962       if (diag) {
963         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
964       }
965     }
966   }
967 
968   ierr = PetscFree(rows);CHKERRQ(ierr);
969   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 
973 #undef __FUNC__
974 #define __FUNC__ "MatSetValues_SeqBAIJ"
975 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
976 {
977   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
978   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
979   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
980   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
981   int         ridx,cidx,bs2=a->bs2,ierr;
982   MatScalar   *ap,value,*aa=a->a,*bap;
983 
984   PetscFunctionBegin;
985   for ( k=0; k<m; k++ ) { /* loop over added rows */
986     row  = im[k]; brow = row/bs;
987     if (row < 0) continue;
988 #if defined(PETSC_USE_BOPT_g)
989     if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m);
990 #endif
991     rp   = aj + ai[brow];
992     ap   = aa + bs2*ai[brow];
993     rmax = imax[brow];
994     nrow = ailen[brow];
995     low  = 0;
996     for ( l=0; l<n; l++ ) { /* loop over added columns */
997       if (in[l] < 0) continue;
998 #if defined(PETSC_USE_BOPT_g)
999       if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n);
1000 #endif
1001       col = in[l]; bcol = col/bs;
1002       ridx = row % bs; cidx = col % bs;
1003       if (roworiented) {
1004         value = v[l + k*n];
1005       } else {
1006         value = v[k + l*m];
1007       }
1008       if (!sorted) low = 0; high = nrow;
1009       while (high-low > 7) {
1010         t = (low+high)/2;
1011         if (rp[t] > bcol) high = t;
1012         else              low  = t;
1013       }
1014       for ( i=low; i<high; i++ ) {
1015         if (rp[i] > bcol) break;
1016         if (rp[i] == bcol) {
1017           bap  = ap +  bs2*i + bs*cidx + ridx;
1018           if (is == ADD_VALUES) *bap += value;
1019           else                  *bap  = value;
1020           goto noinsert1;
1021         }
1022       }
1023       if (nonew == 1) goto noinsert1;
1024       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1025       if (nrow >= rmax) {
1026         /* there is no extra room in row, therefore enlarge */
1027         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1028         MatScalar *new_a;
1029 
1030         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix");
1031 
1032         /* Malloc new storage space */
1033         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1034         new_a   = (MatScalar *) PetscMalloc( len );CHKPTRQ(new_a);
1035         new_j   = (int *) (new_a + bs2*new_nz);
1036         new_i   = new_j + new_nz;
1037 
1038         /* copy over old data into new slots */
1039         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
1040         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1041         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1042         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1043         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1044         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1045         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1046         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1047         /* free up old matrix storage */
1048         ierr = PetscFree(a->a);CHKERRQ(ierr);
1049         if (!a->singlemalloc) {
1050           ierr = PetscFree(a->i);CHKERRQ(ierr);
1051           ierr = PetscFree(a->j);CHKERRQ(ierr);
1052         }
1053         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1054         a->singlemalloc = 1;
1055 
1056         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1057         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1058         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1059         a->maxnz += bs2*CHUNKSIZE;
1060         a->reallocs++;
1061         a->nz++;
1062       }
1063       N = nrow++ - 1;
1064       /* shift up all the later entries in this row */
1065       for ( ii=N; ii>=i; ii-- ) {
1066         rp[ii+1] = rp[ii];
1067         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1068       }
1069       if (N>=i) {
1070         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1071       }
1072       rp[i]                      = bcol;
1073       ap[bs2*i + bs*cidx + ridx] = value;
1074       noinsert1:;
1075       low = i;
1076     }
1077     ailen[brow] = nrow;
1078   }
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1083 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1084 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1085 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*);
1086 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**);
1087 extern int MatMultTrans_SeqBAIJ(Mat,Vec,Vec);
1088 extern int MatMultTransAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
1089 extern int MatScale_SeqBAIJ(Scalar*,Mat);
1090 extern int MatNorm_SeqBAIJ(Mat,NormType,double *);
1091 extern int MatEqual_SeqBAIJ(Mat,Mat, PetscTruth*);
1092 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec);
1093 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
1094 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
1095 extern int MatZeroEntries_SeqBAIJ(Mat);
1096 
1097 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1098 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1099 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1100 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1101 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1102 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1103 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
1104 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
1105 
1106 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1107 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1108 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1109 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1110 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1111 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1112 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*);
1113 
1114 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec);
1115 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec);
1116 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec);
1117 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec);
1118 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec);
1119 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec);
1120 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec);
1121 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec);
1122 
1123 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
1124 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
1125 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
1126 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
1127 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
1128 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
1129 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
1130 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
1131 
1132 #undef __FUNC__
1133 #define __FUNC__ "MatILUFactor_SeqBAIJ"
1134 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1135 {
1136   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1137   Mat         outA;
1138   int         ierr;
1139   PetscTruth  row_identity, col_identity;
1140 
1141   PetscFunctionBegin;
1142   if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU");
1143   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1144   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1145   if (!row_identity || !col_identity) {
1146     SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU");
1147   }
1148 
1149   outA          = inA;
1150   inA->factor   = FACTOR_LU;
1151   a->row        = row;
1152   a->col        = col;
1153 
1154   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1155   ierr = ISInvertPermutation(col,&(a->icol));CHKERRQ(ierr);
1156   PLogObjectParent(inA,a->icol);
1157 
1158   if (!a->solve_work) {
1159     a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1160     PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar));
1161   }
1162 
1163   if (!a->diag) {
1164     ierr = MatMarkDiag_SeqBAIJ(inA);CHKERRQ(ierr);
1165   }
1166   /*
1167       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1168       for ILU(0) factorization with natural ordering
1169   */
1170   switch (a->bs) {
1171     case 2:
1172     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
1173     inA->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering;
1174     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n");
1175     break;
1176   case 3:
1177     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
1178     inA->ops->solve           = MatSolve_SeqBAIJ_3_NaturalOrdering;
1179     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n");
1180     break;
1181   case 4:
1182     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
1183     inA->ops->solve           = MatSolve_SeqBAIJ_4_NaturalOrdering;
1184     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n");
1185     break;
1186   case 5:
1187     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
1188     inA->ops->solve           = MatSolve_SeqBAIJ_5_NaturalOrdering;
1189     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n");
1190     break;
1191   case 6:
1192     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
1193     inA->ops->solve           = MatSolve_SeqBAIJ_6_NaturalOrdering;
1194     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n");
1195     break;
1196   case 7:
1197     inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
1198     inA->ops->solve           = MatSolve_SeqBAIJ_7_NaturalOrdering;
1199     PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n");
1200     break;
1201   }
1202 
1203   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1204 
1205   PetscFunctionReturn(0);
1206 }
1207 #undef __FUNC__
1208 #define __FUNC__ "MatPrintHelp_SeqBAIJ"
1209 int MatPrintHelp_SeqBAIJ(Mat A)
1210 {
1211   static int called = 0;
1212   MPI_Comm   comm = A->comm;
1213   int        ierr;
1214 
1215   PetscFunctionBegin;
1216   if (called) {PetscFunctionReturn(0);} else called = 1;
1217   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1218   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 EXTERN_C_BEGIN
1223 #undef __FUNC__
1224 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1225 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1226 {
1227   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1228   int         i,nz,n;
1229 
1230   PetscFunctionBegin;
1231   nz = baij->maxnz;
1232   n  = baij->n;
1233   for (i=0; i<nz; i++) {
1234     baij->j[i] = indices[i];
1235   }
1236   baij->nz = nz;
1237   for ( i=0; i<n; i++ ) {
1238     baij->ilen[i] = baij->imax[i];
1239   }
1240 
1241   PetscFunctionReturn(0);
1242 }
1243 EXTERN_C_END
1244 
1245 #undef __FUNC__
1246 #define __FUNC__ "MatSeqBAIJSetColumnIndices"
1247 /*@
1248     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1249        in the matrix.
1250 
1251   Input Parameters:
1252 +  mat - the SeqBAIJ matrix
1253 -  indices - the column indices
1254 
1255   Level: advanced
1256 
1257   Notes:
1258     This can be called if you have precomputed the nonzero structure of the
1259   matrix and want to provide it to the matrix object to improve the performance
1260   of the MatSetValues() operation.
1261 
1262     You MUST have set the correct numbers of nonzeros per row in the call to
1263   MatCreateSeqBAIJ().
1264 
1265     MUST be called before any calls to MatSetValues();
1266 
1267 @*/
1268 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1269 {
1270   int ierr,(*f)(Mat,int *);
1271 
1272   PetscFunctionBegin;
1273   PetscValidHeaderSpecific(mat,MAT_COOKIE);
1274   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr);
1275   if (f) {
1276     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1277   } else {
1278     SETERRQ(1,1,"Wrong type of matrix to set column indices");
1279   }
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 /* -------------------------------------------------------------------*/
1284 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1285        MatGetRow_SeqBAIJ,
1286        MatRestoreRow_SeqBAIJ,
1287        MatMult_SeqBAIJ_N,
1288        MatMultAdd_SeqBAIJ_N,
1289        MatMultTrans_SeqBAIJ,
1290        MatMultTransAdd_SeqBAIJ,
1291        MatSolve_SeqBAIJ_N,
1292        0,
1293        0,
1294        0,
1295        MatLUFactor_SeqBAIJ,
1296        0,
1297        0,
1298        MatTranspose_SeqBAIJ,
1299        MatGetInfo_SeqBAIJ,
1300        MatEqual_SeqBAIJ,
1301        MatGetDiagonal_SeqBAIJ,
1302        MatDiagonalScale_SeqBAIJ,
1303        MatNorm_SeqBAIJ,
1304        0,
1305        MatAssemblyEnd_SeqBAIJ,
1306        0,
1307        MatSetOption_SeqBAIJ,
1308        MatZeroEntries_SeqBAIJ,
1309        MatZeroRows_SeqBAIJ,
1310        MatLUFactorSymbolic_SeqBAIJ,
1311        MatLUFactorNumeric_SeqBAIJ_N,
1312        0,
1313        0,
1314        MatGetSize_SeqBAIJ,
1315        MatGetSize_SeqBAIJ,
1316        MatGetOwnershipRange_SeqBAIJ,
1317        MatILUFactorSymbolic_SeqBAIJ,
1318        0,
1319        0,
1320        0,
1321        MatDuplicate_SeqBAIJ,
1322        0,
1323        0,
1324        MatILUFactor_SeqBAIJ,
1325        0,
1326        0,
1327        MatGetSubMatrices_SeqBAIJ,
1328        MatIncreaseOverlap_SeqBAIJ,
1329        MatGetValues_SeqBAIJ,
1330        0,
1331        MatPrintHelp_SeqBAIJ,
1332        MatScale_SeqBAIJ,
1333        0,
1334        0,
1335        0,
1336        MatGetBlockSize_SeqBAIJ,
1337        MatGetRowIJ_SeqBAIJ,
1338        MatRestoreRowIJ_SeqBAIJ,
1339        0,
1340        0,
1341        0,
1342        0,
1343        0,
1344        0,
1345        MatSetValuesBlocked_SeqBAIJ,
1346        MatGetSubMatrix_SeqBAIJ,
1347        0,
1348        0,
1349        MatGetMaps_Petsc};
1350 
1351 EXTERN_C_BEGIN
1352 #undef __FUNC__
1353 #define __FUNC__ "MatStoreValues_SeqBAIJ"
1354 int MatStoreValues_SeqBAIJ(Mat mat)
1355 {
1356   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1357   int         nz = aij->i[aij->m]*aij->bs*aij->bs2;
1358   int         ierr;
1359 
1360   PetscFunctionBegin;
1361   if (aij->nonew != 1) {
1362     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1363   }
1364 
1365   /* allocate space for values if not already there */
1366   if (!aij->saved_values) {
1367     aij->saved_values = (Scalar *) PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values);
1368   }
1369 
1370   /* copy values over */
1371   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 EXTERN_C_END
1375 
1376 EXTERN_C_BEGIN
1377 #undef __FUNC__
1378 #define __FUNC__ "MatRetrieveValues_SeqBAIJ"
1379 int MatRetrieveValues_SeqBAIJ(Mat mat)
1380 {
1381   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1382   int         nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr;
1383 
1384   PetscFunctionBegin;
1385   if (aij->nonew != 1) {
1386     SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1387   }
1388   if (!aij->saved_values) {
1389     SETERRQ(1,1,"Must call MatStoreValues(A);first");
1390   }
1391 
1392   /* copy values over */
1393   ierr = PetscMemcpy(aij->a, aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr);
1394   PetscFunctionReturn(0);
1395 }
1396 EXTERN_C_END
1397 
1398 #undef __FUNC__
1399 #define __FUNC__ "MatCreateSeqBAIJ"
1400 /*@C
1401    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1402    compressed row) format.  For good matrix assembly performance the
1403    user should preallocate the matrix storage by setting the parameter nz
1404    (or the array nnz).  By setting these parameters accurately, performance
1405    during matrix assembly can be increased by more than a factor of 50.
1406 
1407    Collective on MPI_Comm
1408 
1409    Input Parameters:
1410 +  comm - MPI communicator, set to PETSC_COMM_SELF
1411 .  bs - size of block
1412 .  m - number of rows
1413 .  n - number of columns
1414 .  nz - number of block nonzeros per block row (same for all rows)
1415 -  nnz - array containing the number of block nonzeros in the various block rows
1416          (possibly different for each block row) or PETSC_NULL
1417 
1418    Output Parameter:
1419 .  A - the matrix
1420 
1421    Options Database Keys:
1422 .   -mat_no_unroll - uses code that does not unroll the loops in the
1423                      block calculations (much slower)
1424 .    -mat_block_size - size of the blocks to use
1425 
1426    Level: intermediate
1427 
1428    Notes:
1429    The block AIJ format is fully compatible with standard Fortran 77
1430    storage.  That is, the stored row and column indices can begin at
1431    either one (as in Fortran) or zero.  See the users' manual for details.
1432 
1433    Specify the preallocated storage with either nz or nnz (not both).
1434    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1435    allocation.  For additional details, see the users manual chapter on
1436    matrices.
1437 
1438 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1439 @*/
1440 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1441 {
1442   Mat         B;
1443   Mat_SeqBAIJ *b;
1444   int         i,len,ierr,flg,mbs,nbs,bs2,size;
1445 
1446   PetscFunctionBegin;
1447   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1448   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1");
1449 
1450   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1451   mbs  = m/bs;
1452   nbs  = n/bs;
1453   bs2  = bs*bs;
1454 
1455   if (mbs*bs!=m || nbs*bs!=n) {
1456     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize");
1457   }
1458 
1459   if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"nz cannot be less than -2: value %d",nz);
1460   if (nnz) {
1461     for (i=0; i<mbs; i++) {
1462       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1463     }
1464   }
1465 
1466   *A = 0;
1467   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView);
1468   PLogObjectCreate(B);
1469   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b);
1470   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
1471   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1472   ierr    = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
1473   if (!flg) {
1474     switch (bs) {
1475     case 1:
1476       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1477       B->ops->solve           = MatSolve_SeqBAIJ_1;
1478       B->ops->mult            = MatMult_SeqBAIJ_1;
1479       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1480       break;
1481     case 2:
1482       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1483       B->ops->solve           = MatSolve_SeqBAIJ_2;
1484       B->ops->mult            = MatMult_SeqBAIJ_2;
1485       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
1486       break;
1487     case 3:
1488       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1489       B->ops->solve           = MatSolve_SeqBAIJ_3;
1490       B->ops->mult            = MatMult_SeqBAIJ_3;
1491       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
1492       break;
1493     case 4:
1494       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1495       B->ops->solve           = MatSolve_SeqBAIJ_4;
1496       B->ops->mult            = MatMult_SeqBAIJ_4;
1497       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
1498       break;
1499     case 5:
1500       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1501       B->ops->solve           = MatSolve_SeqBAIJ_5;
1502       B->ops->mult            = MatMult_SeqBAIJ_5;
1503       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
1504       break;
1505     case 6:
1506       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
1507       B->ops->solve           = MatSolve_SeqBAIJ_6;
1508       B->ops->mult            = MatMult_SeqBAIJ_6;
1509       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
1510       break;
1511     case 7:
1512       B->ops->mult            = MatMult_SeqBAIJ_7;
1513       B->ops->solve           = MatSolve_SeqBAIJ_7;
1514       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
1515       break;
1516     }
1517   }
1518   B->ops->destroy     = MatDestroy_SeqBAIJ;
1519   B->ops->view        = MatView_SeqBAIJ;
1520   B->factor           = 0;
1521   B->lupivotthreshold = 1.0;
1522   B->mapping          = 0;
1523   b->row              = 0;
1524   b->col              = 0;
1525   b->icol             = 0;
1526   b->reallocs         = 0;
1527   b->saved_values     = 0;
1528 
1529   b->m       = m; B->m = m; B->M = m;
1530   b->n       = n; B->n = n; B->N = n;
1531 
1532   ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr);
1533   ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr);
1534 
1535   b->mbs     = mbs;
1536   b->nbs     = nbs;
1537   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(b->imax);
1538   if (nnz == PETSC_NULL) {
1539     if (nz == PETSC_DEFAULT) nz = 5;
1540     else if (nz <= 0)        nz = 1;
1541     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1542     nz = nz*mbs;
1543   } else {
1544     nz = 0;
1545     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1546   }
1547 
1548   /* allocate the matrix space */
1549   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int);
1550   b->a  = (MatScalar *) PetscMalloc( len );CHKPTRQ(b->a);
1551   ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1552   b->j  = (int *) (b->a + nz*bs2);
1553   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
1554   b->i  = b->j + nz;
1555   b->singlemalloc = 1;
1556 
1557   b->i[0] = 0;
1558   for (i=1; i<mbs+1; i++) {
1559     b->i[i] = b->i[i-1] + b->imax[i-1];
1560   }
1561 
1562   /* b->ilen will count nonzeros in each block row so far. */
1563   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1564   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1565   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1566 
1567   b->bs               = bs;
1568   b->bs2              = bs2;
1569   b->mbs              = mbs;
1570   b->nz               = 0;
1571   b->maxnz            = nz*bs2;
1572   b->sorted           = 0;
1573   b->roworiented      = 1;
1574   b->nonew            = 0;
1575   b->diag             = 0;
1576   b->solve_work       = 0;
1577   b->mult_work        = 0;
1578   b->spptr            = 0;
1579   B->info.nz_unneeded = (double)b->maxnz;
1580 
1581   *A = B;
1582   ierr = OptionsHasName(PETSC_NULL,"-help", &flg);CHKERRQ(ierr);
1583   if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); }
1584 
1585   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
1586                                      "MatStoreValues_SeqBAIJ",
1587                                      (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
1588   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
1589                                      "MatRetrieveValues_SeqBAIJ",
1590                                      (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
1591   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1592                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1593                                      (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
1594   PetscFunctionReturn(0);
1595 }
1596 
1597 #undef __FUNC__
1598 #define __FUNC__ "MatDuplicate_SeqBAIJ"
1599 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1600 {
1601   Mat         C;
1602   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1603   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1604 
1605   PetscFunctionBegin;
1606   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix");
1607 
1608   *B = 0;
1609   PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView);
1610   PLogObjectCreate(C);
1611   C->data         = (void *) (c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c);
1612   ierr            = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1613   C->ops->destroy = MatDestroy_SeqBAIJ;
1614   C->ops->view    = MatView_SeqBAIJ;
1615   C->factor       = A->factor;
1616   c->row          = 0;
1617   c->col          = 0;
1618   c->icol         = 0;
1619   c->saved_values = 0;
1620   C->assembled    = PETSC_TRUE;
1621 
1622   c->m = C->m   = a->m;
1623   c->n = C->n   = a->n;
1624   C->M          = a->m;
1625   C->N          = a->n;
1626 
1627   c->bs         = a->bs;
1628   c->bs2        = a->bs2;
1629   c->mbs        = a->mbs;
1630   c->nbs        = a->nbs;
1631 
1632   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax);
1633   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen);
1634   for ( i=0; i<mbs; i++ ) {
1635     c->imax[i] = a->imax[i];
1636     c->ilen[i] = a->ilen[i];
1637   }
1638 
1639   /* allocate the matrix space */
1640   c->singlemalloc = 1;
1641   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1642   c->a = (MatScalar *) PetscMalloc( len );CHKPTRQ(c->a);
1643   c->j = (int *) (c->a + nz*bs2);
1644   c->i = c->j + nz;
1645   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
1646   if (mbs > 0) {
1647     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
1648     if (cpvalues == MAT_COPY_VALUES) {
1649       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1650     } else {
1651       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
1652     }
1653   }
1654 
1655   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1656   c->sorted      = a->sorted;
1657   c->roworiented = a->roworiented;
1658   c->nonew       = a->nonew;
1659 
1660   if (a->diag) {
1661     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) );CHKPTRQ(c->diag);
1662     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1663     for ( i=0; i<mbs; i++ ) {
1664       c->diag[i] = a->diag[i];
1665     }
1666   } else c->diag        = 0;
1667   c->nz                 = a->nz;
1668   c->maxnz              = a->maxnz;
1669   c->solve_work         = 0;
1670   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1671   c->mult_work          = 0;
1672   *B = C;
1673   ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
1674   PetscFunctionReturn(0);
1675 }
1676 
1677 #undef __FUNC__
1678 #define __FUNC__ "MatLoad_SeqBAIJ"
1679 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1680 {
1681   Mat_SeqBAIJ  *a;
1682   Mat          B;
1683   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1684   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1685   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1686   int          *masked, nmask,tmp,bs2,ishift;
1687   Scalar       *aa;
1688   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1689 
1690   PetscFunctionBegin;
1691   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1692   bs2  = bs*bs;
1693 
1694   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1695   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor");
1696   ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1697   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
1698   if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object");
1699   M = header[1]; N = header[2]; nz = header[3];
1700 
1701   if (header[3] < 0) {
1702     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ");
1703   }
1704 
1705   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
1706 
1707   /*
1708      This code adds extra rows to make sure the number of rows is
1709     divisible by the blocksize
1710   */
1711   mbs        = M/bs;
1712   extra_rows = bs - M + bs*(mbs);
1713   if (extra_rows == bs) extra_rows = 0;
1714   else                  mbs++;
1715   if (extra_rows) {
1716     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1717   }
1718 
1719   /* read in row lengths */
1720   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1721   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1722   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1723 
1724   /* read in column indices */
1725   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) );CHKPTRQ(jj);
1726   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
1727   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1728 
1729   /* loop over row lengths determining block row lengths */
1730   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1731   ierr        = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
1732   mask        = (int *) PetscMalloc( 2*mbs*sizeof(int) );CHKPTRQ(mask);
1733   ierr        = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
1734   masked      = mask + mbs;
1735   rowcount    = 0; nzcount = 0;
1736   for ( i=0; i<mbs; i++ ) {
1737     nmask = 0;
1738     for ( j=0; j<bs; j++ ) {
1739       kmax = rowlengths[rowcount];
1740       for ( k=0; k<kmax; k++ ) {
1741         tmp = jj[nzcount++]/bs;
1742         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1743       }
1744       rowcount++;
1745     }
1746     browlengths[i] += nmask;
1747     /* zero out the mask elements we set */
1748     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1749   }
1750 
1751   /* create our matrix */
1752   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr);
1753   B = *A;
1754   a = (Mat_SeqBAIJ *) B->data;
1755 
1756   /* set matrix "i" values */
1757   a->i[0] = 0;
1758   for ( i=1; i<= mbs; i++ ) {
1759     a->i[i]      = a->i[i-1] + browlengths[i-1];
1760     a->ilen[i-1] = browlengths[i-1];
1761   }
1762   a->nz         = 0;
1763   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1764 
1765   /* read in nonzero values */
1766   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
1767   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
1768   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
1769 
1770   /* set "a" and "j" values into matrix */
1771   nzcount = 0; jcount = 0;
1772   for ( i=0; i<mbs; i++ ) {
1773     nzcountb = nzcount;
1774     nmask    = 0;
1775     for ( j=0; j<bs; j++ ) {
1776       kmax = rowlengths[i*bs+j];
1777       for ( k=0; k<kmax; k++ ) {
1778         tmp = jj[nzcount++]/bs;
1779 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1780       }
1781       rowcount++;
1782     }
1783     /* sort the masked values */
1784     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
1785 
1786     /* set "j" values into matrix */
1787     maskcount = 1;
1788     for ( j=0; j<nmask; j++ ) {
1789       a->j[jcount++]  = masked[j];
1790       mask[masked[j]] = maskcount++;
1791     }
1792     /* set "a" values into matrix */
1793     ishift = bs2*a->i[i];
1794     for ( j=0; j<bs; j++ ) {
1795       kmax = rowlengths[i*bs+j];
1796       for ( k=0; k<kmax; k++ ) {
1797         tmp       = jj[nzcountb]/bs ;
1798         block     = mask[tmp] - 1;
1799         point     = jj[nzcountb] - bs*tmp;
1800         idx       = ishift + bs2*block + j + bs*point;
1801         a->a[idx] = aa[nzcountb++];
1802       }
1803     }
1804     /* zero out the mask elements we set */
1805     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1806   }
1807   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix");
1808 
1809   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1810   ierr = PetscFree(browlengths);CHKERRQ(ierr);
1811   ierr = PetscFree(aa);CHKERRQ(ierr);
1812   ierr = PetscFree(jj);CHKERRQ(ierr);
1813   ierr = PetscFree(mask);CHKERRQ(ierr);
1814 
1815   B->assembled = PETSC_TRUE;
1816 
1817   ierr = MatView_Private(B);CHKERRQ(ierr);
1818   PetscFunctionReturn(0);
1819 }
1820 
1821 
1822 
1823