xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 639f9d9dbbc54d6ac4e42e98283c540b41bb2cee) !
1 #ifndef lint
2 static char vcid[] = "$Id: baij.c,v 1.68 1996/09/14 03:08:32 bsmith Exp bsmith $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 #include "src/mat/impls/baij/seq/baij.h"
10 #include "src/vec/vecimpl.h"
11 #include "src/inline/spops.h"
12 #include "petsc.h"
13 
14 
15 /*
16      Adds diagonal pointers to sparse matrix structure.
17 */
18 
19 int MatMarkDiag_SeqBAIJ(Mat A)
20 {
21   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
22   int         i,j, *diag, m = a->mbs;
23 
24   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
25   PLogObjectMemory(A,(m+1)*sizeof(int));
26   for ( i=0; i<m; i++ ) {
27     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
28       if (a->j[j] == i) {
29         diag[i] = j;
30         break;
31       }
32     }
33   }
34   a->diag = diag;
35   return 0;
36 }
37 
38 #include "draw.h"
39 #include "pinclude/pviewer.h"
40 #include "sys.h"
41 
42 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
43 
44 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
45                             PetscTruth *done)
46 {
47   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
48   int         ierr, n = a->mbs,i;
49 
50   *nn = n;
51   if (!ia) return 0;
52   if (symmetric) {
53     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
54   } else if (oshift == 1) {
55     /* temporarily add 1 to i and j indices */
56     int nz = a->i[n] + 1;
57     for ( i=0; i<nz; i++ ) a->j[i]++;
58     for ( i=0; i<n+1; i++ ) a->i[i]++;
59     *ia = a->i; *ja = a->j;
60   } else {
61     *ia = a->i; *ja = a->j;
62   }
63 
64   return 0;
65 }
66 
67 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
68                                 PetscTruth *done)
69 {
70   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
71   int         i,n = a->mbs;
72 
73   if (!ia) return 0;
74   if (symmetric) {
75     PetscFree(*ia);
76     PetscFree(*ja);
77   }
78     if (oshift == 1) {
79     int nz = a->i[n];
80     for ( i=0; i<nz; i++ ) a->j[i]--;
81     for ( i=0; i<n+1; i++ ) a->i[i]--;
82   }
83   return 0;
84 }
85 
86 
87 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
88 {
89   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
90   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
91   Scalar      *aa;
92 
93   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
94   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
95   col_lens[0] = MAT_COOKIE;
96 
97   col_lens[1] = a->m;
98   col_lens[2] = a->n;
99   col_lens[3] = a->nz*bs2;
100 
101   /* store lengths of each row and write (including header) to file */
102   count = 0;
103   for ( i=0; i<a->mbs; i++ ) {
104     for ( j=0; j<bs; j++ ) {
105       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
106     }
107   }
108   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
109   PetscFree(col_lens);
110 
111   /* store column indices (zero start index) */
112   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
113   count = 0;
114   for ( i=0; i<a->mbs; i++ ) {
115     for ( j=0; j<bs; j++ ) {
116       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
117         for ( l=0; l<bs; l++ ) {
118           jj[count++] = bs*a->j[k] + l;
119         }
120       }
121     }
122   }
123   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
124   PetscFree(jj);
125 
126   /* store nonzero values */
127   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
128   count = 0;
129   for ( i=0; i<a->mbs; i++ ) {
130     for ( j=0; j<bs; j++ ) {
131       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
132         for ( l=0; l<bs; l++ ) {
133           aa[count++] = a->a[bs2*k + l*bs + j];
134         }
135       }
136     }
137   }
138   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
139   PetscFree(aa);
140   return 0;
141 }
142 
143 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
144 {
145   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
146   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
147   FILE        *fd;
148   char        *outputname;
149 
150   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
151   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
152   ierr = ViewerGetFormat(viewer,&format);
153   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
154     fprintf(fd,"  block size is %d\n",bs);
155   }
156   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
157     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
158   }
159   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
160     for ( i=0; i<a->mbs; i++ ) {
161       for ( j=0; j<bs; j++ ) {
162         fprintf(fd,"row %d:",i*bs+j);
163         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
164           for ( l=0; l<bs; l++ ) {
165 #if defined(PETSC_COMPLEX)
166           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
167             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
168               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
169           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
170             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
171 #else
172           if (a->a[bs2*k + l*bs + j] != 0.0)
173             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
174 #endif
175           }
176         }
177         fprintf(fd,"\n");
178       }
179     }
180   }
181   else {
182     for ( i=0; i<a->mbs; i++ ) {
183       for ( j=0; j<bs; j++ ) {
184         fprintf(fd,"row %d:",i*bs+j);
185         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
186           for ( l=0; l<bs; l++ ) {
187 #if defined(PETSC_COMPLEX)
188           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
189             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
190               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
191           }
192           else {
193             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
194           }
195 #else
196             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
197 #endif
198           }
199         }
200         fprintf(fd,"\n");
201       }
202     }
203   }
204   fflush(fd);
205   return 0;
206 }
207 
208 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
209 {
210   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
211   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
212   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
213   Scalar       *aa;
214   Draw         draw;
215   DrawButton   button;
216   PetscTruth   isnull;
217 
218   ViewerDrawGetDraw(viewer,&draw);
219   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
220 
221   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
222   xr += w;    yr += h;  xl = -w;     yl = -h;
223   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
224   /* loop over matrix elements drawing boxes */
225   color = DRAW_BLUE;
226   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
227     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
228       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
229       x_l = a->j[j]*bs; x_r = x_l + 1.0;
230       aa = a->a + j*bs2;
231       for ( k=0; k<bs; k++ ) {
232         for ( l=0; l<bs; l++ ) {
233           if (PetscReal(*aa++) >=  0.) continue;
234           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
235         }
236       }
237     }
238   }
239   color = DRAW_CYAN;
240   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
241     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
242       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
243       x_l = a->j[j]*bs; x_r = x_l + 1.0;
244       aa = a->a + j*bs2;
245       for ( k=0; k<bs; k++ ) {
246         for ( l=0; l<bs; l++ ) {
247           if (PetscReal(*aa++) != 0.) continue;
248           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
249         }
250       }
251     }
252   }
253 
254   color = DRAW_RED;
255   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
256     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
257       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
258       x_l = a->j[j]*bs; x_r = x_l + 1.0;
259       aa = a->a + j*bs2;
260       for ( k=0; k<bs; k++ ) {
261         for ( l=0; l<bs; l++ ) {
262           if (PetscReal(*aa++) <= 0.) continue;
263           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
264         }
265       }
266     }
267   }
268 
269   DrawFlush(draw);
270   DrawGetPause(draw,&pause);
271   if (pause >= 0) { PetscSleep(pause); return 0;}
272 
273   /* allow the matrix to zoom or shrink */
274   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
275   while (button != BUTTON_RIGHT) {
276     DrawClear(draw);
277     if (button == BUTTON_LEFT) scale = .5;
278     else if (button == BUTTON_CENTER) scale = 2.;
279     xl = scale*(xl + w - xc) + xc - w*scale;
280     xr = scale*(xr - w - xc) + xc + w*scale;
281     yl = scale*(yl + h - yc) + yc - h*scale;
282     yr = scale*(yr - h - yc) + yc + h*scale;
283     w *= scale; h *= scale;
284     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
285     color = DRAW_BLUE;
286     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
287       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
288         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
289         x_l = a->j[j]*bs; x_r = x_l + 1.0;
290         aa = a->a + j*bs2;
291         for ( k=0; k<bs; k++ ) {
292           for ( l=0; l<bs; l++ ) {
293             if (PetscReal(*aa++) >=  0.) continue;
294             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
295           }
296         }
297       }
298     }
299     color = DRAW_CYAN;
300     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
301       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
302         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
303         x_l = a->j[j]*bs; x_r = x_l + 1.0;
304         aa = a->a + j*bs2;
305         for ( k=0; k<bs; k++ ) {
306           for ( l=0; l<bs; l++ ) {
307           if (PetscReal(*aa++) != 0.) continue;
308           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
309           }
310         }
311       }
312     }
313 
314     color = DRAW_RED;
315     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
316       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
317         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
318         x_l = a->j[j]*bs; x_r = x_l + 1.0;
319         aa = a->a + j*bs2;
320         for ( k=0; k<bs; k++ ) {
321           for ( l=0; l<bs; l++ ) {
322             if (PetscReal(*aa++) <= 0.) continue;
323             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
324           }
325         }
326       }
327     }
328     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
329   }
330   return 0;
331 }
332 
333 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
334 {
335   Mat         A = (Mat) obj;
336   ViewerType  vtype;
337   int         ierr;
338 
339   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
340   if (vtype == MATLAB_VIEWER) {
341     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
342   }
343   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
344     return MatView_SeqBAIJ_ASCII(A,viewer);
345   }
346   else if (vtype == BINARY_FILE_VIEWER) {
347     return MatView_SeqBAIJ_Binary(A,viewer);
348   }
349   else if (vtype == DRAW_VIEWER) {
350     return MatView_SeqBAIJ_Draw(A,viewer);
351   }
352   return 0;
353 }
354 
355 #define CHUNKSIZE  10
356 
357 /* This version has row oriented v  */
358 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
359 {
360   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
361   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
362   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
363   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
364   int          ridx,cidx,bs2=a->bs2;
365   Scalar      *ap,value,*aa=a->a,*bap;
366 
367   for ( k=0; k<m; k++ ) { /* loop over added rows */
368     row  = im[k]; brow = row/bs;
369 #if defined(PETSC_BOPT_g)
370     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
371     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
372 #endif
373     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
374     rmax = imax[brow]; nrow = ailen[brow];
375     low = 0;
376     for ( l=0; l<n; l++ ) { /* loop over added columns */
377 #if defined(PETSC_BOPT_g)
378       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
379       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
380 #endif
381       col = in[l]; bcol = col/bs;
382       ridx = row % bs; cidx = col % bs;
383       if (roworiented) {
384         value = *v++;
385       }
386       else {
387         value = v[k + l*m];
388       }
389       if (!sorted) low = 0; high = nrow;
390       while (high-low > 5) {
391         t = (low+high)/2;
392         if (rp[t] > bcol) high = t;
393         else              low  = t;
394       }
395       for ( i=low; i<high; i++ ) {
396         if (rp[i] > bcol) break;
397         if (rp[i] == bcol) {
398           bap  = ap +  bs2*i + bs*cidx + ridx;
399           if (is == ADD_VALUES) *bap += value;
400           else                  *bap  = value;
401           goto noinsert;
402         }
403       }
404       if (nonew) goto noinsert;
405       if (nrow >= rmax) {
406         /* there is no extra room in row, therefore enlarge */
407         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
408         Scalar *new_a;
409 
410         /* malloc new storage space */
411         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
412         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
413         new_j   = (int *) (new_a + bs2*new_nz);
414         new_i   = new_j + new_nz;
415 
416         /* copy over old data into new slots */
417         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
418         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
419         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
420         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
421         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
422                                                            len*sizeof(int));
423         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
424         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
425         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
426                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
427         /* free up old matrix storage */
428         PetscFree(a->a);
429         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
430         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
431         a->singlemalloc = 1;
432 
433         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
434         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
435         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
436         a->maxnz += bs2*CHUNKSIZE;
437         a->reallocs++;
438         a->nz++;
439       }
440       N = nrow++ - 1;
441       /* shift up all the later entries in this row */
442       for ( ii=N; ii>=i; ii-- ) {
443         rp[ii+1] = rp[ii];
444         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
445       }
446       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
447       rp[i]                      = bcol;
448       ap[bs2*i + bs*cidx + ridx] = value;
449       noinsert:;
450       low = i;
451     }
452     ailen[brow] = nrow;
453   }
454   return 0;
455 }
456 
457 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
458 {
459   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
460   *m = a->m; *n = a->n;
461   return 0;
462 }
463 
464 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
465 {
466   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
467   *m = 0; *n = a->m;
468   return 0;
469 }
470 
471 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
472 {
473   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
474   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
475   Scalar      *aa,*v_i,*aa_i;
476 
477   bs  = a->bs;
478   ai  = a->i;
479   aj  = a->j;
480   aa  = a->a;
481   bs2 = a->bs2;
482 
483   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
484 
485   bn  = row/bs;   /* Block number */
486   bp  = row % bs; /* Block Position */
487   M   = ai[bn+1] - ai[bn];
488   *nz = bs*M;
489 
490   if (v) {
491     *v = 0;
492     if (*nz) {
493       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
494       for ( i=0; i<M; i++ ) { /* for each block in the block row */
495         v_i  = *v + i*bs;
496         aa_i = aa + bs2*(ai[bn] + i);
497         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
498       }
499     }
500   }
501 
502   if (idx) {
503     *idx = 0;
504     if (*nz) {
505       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
506       for ( i=0; i<M; i++ ) { /* for each block in the block row */
507         idx_i = *idx + i*bs;
508         itmp  = bs*aj[ai[bn] + i];
509         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
510       }
511     }
512   }
513   return 0;
514 }
515 
516 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
517 {
518   if (idx) {if (*idx) PetscFree(*idx);}
519   if (v)   {if (*v)   PetscFree(*v);}
520   return 0;
521 }
522 
523 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
524 {
525   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
526   Mat         C;
527   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
528   int         *rows,*cols,bs2=a->bs2;
529   Scalar      *array=a->a;
530 
531   if (B==PETSC_NULL && mbs!=nbs)
532     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
533   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
534   PetscMemzero(col,(1+nbs)*sizeof(int));
535 
536   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
537   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
538   PetscFree(col);
539   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
540   cols = rows + bs;
541   for ( i=0; i<mbs; i++ ) {
542     cols[0] = i*bs;
543     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
544     len = ai[i+1] - ai[i];
545     for ( j=0; j<len; j++ ) {
546       rows[0] = (*aj++)*bs;
547       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
548       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
549       array += bs2;
550     }
551   }
552   PetscFree(rows);
553 
554   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
555   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
556 
557   if (B != PETSC_NULL) {
558     *B = C;
559   } else {
560     /* This isn't really an in-place transpose */
561     PetscFree(a->a);
562     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
563     if (a->diag) PetscFree(a->diag);
564     if (a->ilen) PetscFree(a->ilen);
565     if (a->imax) PetscFree(a->imax);
566     if (a->solve_work) PetscFree(a->solve_work);
567     PetscFree(a);
568     PetscMemcpy(A,C,sizeof(struct _Mat));
569     PetscHeaderDestroy(C);
570   }
571   return 0;
572 }
573 
574 
575 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
576 {
577   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
578   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
579   int        m = a->m,*ip, N, *ailen = a->ilen;
580   int        mbs = a->mbs, bs2 = a->bs2;
581   Scalar     *aa = a->a, *ap;
582 
583   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
584 
585   for ( i=1; i<mbs; i++ ) {
586     /* move each row back by the amount of empty slots (fshift) before it*/
587     fshift += imax[i-1] - ailen[i-1];
588     if (fshift) {
589       ip = aj + ai[i]; ap = aa + bs2*ai[i];
590       N = ailen[i];
591       for ( j=0; j<N; j++ ) {
592         ip[j-fshift] = ip[j];
593         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
594       }
595     }
596     ai[i] = ai[i-1] + ailen[i-1];
597   }
598   if (mbs) {
599     fshift += imax[mbs-1] - ailen[mbs-1];
600     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
601   }
602   /* reset ilen and imax for each row */
603   for ( i=0; i<mbs; i++ ) {
604     ailen[i] = imax[i] = ai[i+1] - ai[i];
605   }
606   a->nz = ai[mbs];
607 
608   /* diagonals may have moved, so kill the diagonal pointers */
609   if (fshift && a->diag) {
610     PetscFree(a->diag);
611     PLogObjectMemory(A,-(m+1)*sizeof(int));
612     a->diag = 0;
613   }
614   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneed storage space %d used %d, rows %d, block size %d\n",
615            fshift*bs2,a->nz*bs2,m,a->bs);
616   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n",
617            a->reallocs);
618   A->info.nz_unneeded  = (double)fshift*bs2;
619 
620   return 0;
621 }
622 
623 static int MatZeroEntries_SeqBAIJ(Mat A)
624 {
625   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
626   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
627   return 0;
628 }
629 
630 int MatDestroy_SeqBAIJ(PetscObject obj)
631 {
632   Mat         A  = (Mat) obj;
633   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
634 
635 #if defined(PETSC_LOG)
636   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
637 #endif
638   PetscFree(a->a);
639   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
640   if (a->diag) PetscFree(a->diag);
641   if (a->ilen) PetscFree(a->ilen);
642   if (a->imax) PetscFree(a->imax);
643   if (a->solve_work) PetscFree(a->solve_work);
644   if (a->mult_work) PetscFree(a->mult_work);
645   PetscFree(a);
646   PLogObjectDestroy(A);
647   PetscHeaderDestroy(A);
648   return 0;
649 }
650 
651 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
652 {
653   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
654   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
655   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
656   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
657   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
658   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
659   else if (op == MAT_ROWS_SORTED ||
660            op == MAT_SYMMETRIC ||
661            op == MAT_STRUCTURALLY_SYMMETRIC ||
662            op == MAT_YES_NEW_DIAGONALS)
663     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
664   else if (op == MAT_NO_NEW_DIAGONALS)
665     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");}
666   else
667     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
668   return 0;
669 }
670 
671 
672 /* -------------------------------------------------------*/
673 /* Should check that shapes of vectors and matrices match */
674 /* -------------------------------------------------------*/
675 #include "pinclude/plapack.h"
676 
677 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
678 {
679   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
680   register Scalar *x,*z,*v,sum;
681   int             mbs=a->mbs,i,*idx,*ii,n;
682 
683   VecGetArray_Fast(xx,x);
684   VecGetArray_Fast(zz,z);
685 
686   idx   = a->j;
687   v     = a->a;
688   ii    = a->i;
689 
690   for ( i=0; i<mbs; i++ ) {
691     n    = ii[1] - ii[0]; ii++;
692     sum  = 0.0;
693     while (n--) sum += *v++ * x[*idx++];
694     z[i] = sum;
695   }
696   VecRestoreArray_Fast(xx,x);
697   VecRestoreArray_Fast(zz,z);
698   PLogFlops(2*a->nz - a->m);
699   return 0;
700 }
701 
702 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
703 {
704   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
705   register Scalar *x,*z,*v,*xb,sum1,sum2;
706   register Scalar x1,x2;
707   int             mbs=a->mbs,i,*idx,*ii;
708   int             j,n;
709 
710   VecGetArray_Fast(xx,x);
711   VecGetArray_Fast(zz,z);
712 
713   idx   = a->j;
714   v     = a->a;
715   ii    = a->i;
716 
717   for ( i=0; i<mbs; i++ ) {
718     n  = ii[1] - ii[0]; ii++;
719     sum1 = 0.0; sum2 = 0.0;
720     for ( j=0; j<n; j++ ) {
721       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
722       sum1 += v[0]*x1 + v[2]*x2;
723       sum2 += v[1]*x1 + v[3]*x2;
724       v += 4;
725     }
726     z[0] = sum1; z[1] = sum2;
727     z += 2;
728   }
729   VecRestoreArray_Fast(xx,x);
730   VecRestoreArray_Fast(zz,z);
731   PLogFlops(4*a->nz - a->m);
732   return 0;
733 }
734 
735 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
736 {
737   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
738   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
739   int             mbs=a->mbs,i,*idx,*ii,j,n;
740 
741   VecGetArray_Fast(xx,x);
742   VecGetArray_Fast(zz,z);
743 
744   idx   = a->j;
745   v     = a->a;
746   ii    = a->i;
747 
748   for ( i=0; i<mbs; i++ ) {
749     n  = ii[1] - ii[0]; ii++;
750     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
751     for ( j=0; j<n; j++ ) {
752       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
753       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
754       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
755       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
756       v += 9;
757     }
758     z[0] = sum1; z[1] = sum2; z[2] = sum3;
759     z += 3;
760   }
761   VecRestoreArray_Fast(xx,x);
762   VecRestoreArray_Fast(zz,z);
763   PLogFlops(18*a->nz - a->m);
764   return 0;
765 }
766 
767 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
768 {
769   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
770   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
771   register Scalar x1,x2,x3,x4;
772   int             mbs=a->mbs,i,*idx,*ii;
773   int             j,n;
774 
775   VecGetArray_Fast(xx,x);
776   VecGetArray_Fast(zz,z);
777 
778   idx   = a->j;
779   v     = a->a;
780   ii    = a->i;
781 
782   for ( i=0; i<mbs; i++ ) {
783     n  = ii[1] - ii[0]; ii++;
784     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
785     for ( j=0; j<n; j++ ) {
786       xb = x + 4*(*idx++);
787       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
788       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
789       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
790       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
791       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
792       v += 16;
793     }
794     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
795     z += 4;
796   }
797   VecRestoreArray_Fast(xx,x);
798   VecRestoreArray_Fast(zz,z);
799   PLogFlops(32*a->nz - a->m);
800   return 0;
801 }
802 
803 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
804 {
805   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
806   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
807   register Scalar x1,x2,x3,x4,x5;
808   int             mbs=a->mbs,i,*idx,*ii,j,n;
809 
810   VecGetArray_Fast(xx,x);
811   VecGetArray_Fast(zz,z);
812 
813   idx   = a->j;
814   v     = a->a;
815   ii    = a->i;
816 
817   for ( i=0; i<mbs; i++ ) {
818     n  = ii[1] - ii[0]; ii++;
819     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
820     for ( j=0; j<n; j++ ) {
821       xb = x + 5*(*idx++);
822       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
823       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
824       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
825       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
826       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
827       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
828       v += 25;
829     }
830     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
831     z += 5;
832   }
833   VecRestoreArray_Fast(xx,x);
834   VecRestoreArray_Fast(zz,z);
835   PLogFlops(50*a->nz - a->m);
836   return 0;
837 }
838 
839 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
840 {
841   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
842   register Scalar *x,*z,*v,*xb;
843   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
844   int             _One = 1,ncols,k;
845   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
846 
847   VecGetArray_Fast(xx,x);
848   VecGetArray_Fast(zz,z);
849 
850   idx   = a->j;
851   v     = a->a;
852   ii    = a->i;
853 
854 
855   if (!a->mult_work) {
856     k = PetscMax(a->m,a->n);
857     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
858   }
859   work = a->mult_work;
860   for ( i=0; i<mbs; i++ ) {
861     n     = ii[1] - ii[0]; ii++;
862     ncols = n*bs;
863     workt = work;
864     for ( j=0; j<n; j++ ) {
865       xb = x + bs*(*idx++);
866       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
867       workt += bs;
868     }
869     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
870     v += n*bs2;
871     z += bs;
872   }
873   VecRestoreArray_Fast(xx,x);
874   VecRestoreArray_Fast(zz,z);
875   PLogFlops(2*a->nz*bs2 - a->m);
876   return 0;
877 }
878 
879 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
880 {
881   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
882   register Scalar *x,*y,*z,*v,sum;
883   int             mbs=a->mbs,i,*idx,*ii,n;
884 
885   VecGetArray_Fast(xx,x);
886   VecGetArray_Fast(yy,y);
887   VecGetArray_Fast(zz,z);
888 
889   idx   = a->j;
890   v     = a->a;
891   ii    = a->i;
892 
893   for ( i=0; i<mbs; i++ ) {
894     n    = ii[1] - ii[0]; ii++;
895     sum  = y[i];
896     while (n--) sum += *v++ * x[*idx++];
897     z[i] = sum;
898   }
899   VecRestoreArray_Fast(xx,x);
900   VecRestoreArray_Fast(yy,y);
901   VecRestoreArray_Fast(zz,z);
902   PLogFlops(2*a->nz);
903   return 0;
904 }
905 
906 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
907 {
908   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
909   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
910   register Scalar x1,x2;
911   int             mbs=a->mbs,i,*idx,*ii;
912   int             j,n;
913 
914   VecGetArray_Fast(xx,x);
915   VecGetArray_Fast(yy,y);
916   VecGetArray_Fast(zz,z);
917 
918   idx   = a->j;
919   v     = a->a;
920   ii    = a->i;
921 
922   for ( i=0; i<mbs; i++ ) {
923     n  = ii[1] - ii[0]; ii++;
924     sum1 = y[0]; sum2 = y[1];
925     for ( j=0; j<n; j++ ) {
926       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
927       sum1 += v[0]*x1 + v[2]*x2;
928       sum2 += v[1]*x1 + v[3]*x2;
929       v += 4;
930     }
931     z[0] = sum1; z[1] = sum2;
932     z += 2; y += 2;
933   }
934   VecRestoreArray_Fast(xx,x);
935   VecRestoreArray_Fast(yy,y);
936   VecRestoreArray_Fast(zz,z);
937   PLogFlops(4*a->nz);
938   return 0;
939 }
940 
941 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
942 {
943   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
944   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
945   int             mbs=a->mbs,i,*idx,*ii,j,n;
946 
947   VecGetArray_Fast(xx,x);
948   VecGetArray_Fast(yy,y);
949   VecGetArray_Fast(zz,z);
950 
951   idx   = a->j;
952   v     = a->a;
953   ii    = a->i;
954 
955   for ( i=0; i<mbs; i++ ) {
956     n  = ii[1] - ii[0]; ii++;
957     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
958     for ( j=0; j<n; j++ ) {
959       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
960       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
961       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
962       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
963       v += 9;
964     }
965     z[0] = sum1; z[1] = sum2; z[2] = sum3;
966     z += 3; y += 3;
967   }
968   VecRestoreArray_Fast(xx,x);
969   VecRestoreArray_Fast(yy,y);
970   VecRestoreArray_Fast(zz,z);
971   PLogFlops(18*a->nz);
972   return 0;
973 }
974 
975 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
976 {
977   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
978   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
979   register Scalar x1,x2,x3,x4;
980   int             mbs=a->mbs,i,*idx,*ii;
981   int             j,n;
982 
983   VecGetArray_Fast(xx,x);
984   VecGetArray_Fast(yy,y);
985   VecGetArray_Fast(zz,z);
986 
987   idx   = a->j;
988   v     = a->a;
989   ii    = a->i;
990 
991   for ( i=0; i<mbs; i++ ) {
992     n  = ii[1] - ii[0]; ii++;
993     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
994     for ( j=0; j<n; j++ ) {
995       xb = x + 4*(*idx++);
996       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
997       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
998       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
999       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1000       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1001       v += 16;
1002     }
1003     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1004     z += 4; y += 4;
1005   }
1006   VecRestoreArray_Fast(xx,x);
1007   VecRestoreArray_Fast(yy,y);
1008   VecRestoreArray_Fast(zz,z);
1009   PLogFlops(32*a->nz);
1010   return 0;
1011 }
1012 
1013 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1014 {
1015   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1016   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1017   register Scalar x1,x2,x3,x4,x5;
1018   int             mbs=a->mbs,i,*idx,*ii,j,n;
1019 
1020   VecGetArray_Fast(xx,x);
1021   VecGetArray_Fast(yy,y);
1022   VecGetArray_Fast(zz,z);
1023 
1024   idx   = a->j;
1025   v     = a->a;
1026   ii    = a->i;
1027 
1028   for ( i=0; i<mbs; i++ ) {
1029     n  = ii[1] - ii[0]; ii++;
1030     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1031     for ( j=0; j<n; j++ ) {
1032       xb = x + 5*(*idx++);
1033       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1034       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1035       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1036       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1037       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1038       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1039       v += 25;
1040     }
1041     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1042     z += 5; y += 5;
1043   }
1044   VecRestoreArray_Fast(xx,x);
1045   VecRestoreArray_Fast(yy,y);
1046   VecRestoreArray_Fast(zz,z);
1047   PLogFlops(50*a->nz);
1048   return 0;
1049 }
1050 
1051 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1052 {
1053   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1054   register Scalar *x,*z,*v,*xb;
1055   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1056   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1057 
1058   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1059 
1060   VecGetArray_Fast(xx,x);
1061   VecGetArray_Fast(zz,z);
1062 
1063   idx   = a->j;
1064   v     = a->a;
1065   ii    = a->i;
1066 
1067 
1068   if (!a->mult_work) {
1069     k = PetscMax(a->m,a->n);
1070     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1071   }
1072   work = a->mult_work;
1073   for ( i=0; i<mbs; i++ ) {
1074     n     = ii[1] - ii[0]; ii++;
1075     ncols = n*bs;
1076     workt = work;
1077     for ( j=0; j<n; j++ ) {
1078       xb = x + bs*(*idx++);
1079       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1080       workt += bs;
1081     }
1082     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1083     v += n*bs2;
1084     z += bs;
1085   }
1086   VecRestoreArray_Fast(xx,x);
1087   VecRestoreArray_Fast(zz,z);
1088   PLogFlops(2*a->nz*bs2 );
1089   return 0;
1090 }
1091 
1092 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1093 {
1094   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1095   Scalar          *xg,*zg,*zb;
1096   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1097   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1098   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1099 
1100 
1101   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1102   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1103   PetscMemzero(z,N*sizeof(Scalar));
1104 
1105   idx   = a->j;
1106   v     = a->a;
1107   ii    = a->i;
1108 
1109   switch (bs) {
1110   case 1:
1111     for ( i=0; i<mbs; i++ ) {
1112       n  = ii[1] - ii[0]; ii++;
1113       xb = x + i; x1 = xb[0];
1114       ib = idx + ai[i];
1115       for ( j=0; j<n; j++ ) {
1116         rval    = ib[j];
1117         z[rval] += *v++ * x1;
1118       }
1119     }
1120     break;
1121   case 2:
1122     for ( i=0; i<mbs; i++ ) {
1123       n  = ii[1] - ii[0]; ii++;
1124       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1125       ib = idx + ai[i];
1126       for ( j=0; j<n; j++ ) {
1127         rval      = ib[j]*2;
1128         z[rval++] += v[0]*x1 + v[1]*x2;
1129         z[rval++] += v[2]*x1 + v[3]*x2;
1130         v += 4;
1131       }
1132     }
1133     break;
1134   case 3:
1135     for ( i=0; i<mbs; i++ ) {
1136       n  = ii[1] - ii[0]; ii++;
1137       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1138       ib = idx + ai[i];
1139       for ( j=0; j<n; j++ ) {
1140         rval      = ib[j]*3;
1141         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1142         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1143         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1144         v += 9;
1145       }
1146     }
1147     break;
1148   case 4:
1149     for ( i=0; i<mbs; i++ ) {
1150       n  = ii[1] - ii[0]; ii++;
1151       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1152       ib = idx + ai[i];
1153       for ( j=0; j<n; j++ ) {
1154         rval      = ib[j]*4;
1155         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1156         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1157         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1158         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1159         v += 16;
1160       }
1161     }
1162     break;
1163   case 5:
1164     for ( i=0; i<mbs; i++ ) {
1165       n  = ii[1] - ii[0]; ii++;
1166       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1167       x4 = xb[3];   x5 = xb[4];
1168       ib = idx + ai[i];
1169       for ( j=0; j<n; j++ ) {
1170         rval      = ib[j]*5;
1171         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1172         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1173         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1174         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1175         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1176         v += 25;
1177       }
1178     }
1179     break;
1180       /* block sizes larger then 5 by 5 are handled by BLAS */
1181     default: {
1182       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1183       if (!a->mult_work) {
1184         k = PetscMax(a->m,a->n);
1185         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1186         CHKPTRQ(a->mult_work);
1187       }
1188       work = a->mult_work;
1189       for ( i=0; i<mbs; i++ ) {
1190         n     = ii[1] - ii[0]; ii++;
1191         ncols = n*bs;
1192         PetscMemzero(work,ncols*sizeof(Scalar));
1193         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1194         v += n*bs2;
1195         x += bs;
1196         workt = work;
1197         for ( j=0; j<n; j++ ) {
1198           zb = z + bs*(*idx++);
1199           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1200           workt += bs;
1201         }
1202       }
1203     }
1204   }
1205   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1206   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1207   PLogFlops(2*a->nz*a->bs2 - a->n);
1208   return 0;
1209 }
1210 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1211 {
1212   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1213   Scalar          *xg,*zg,*zb;
1214   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1215   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1216   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1217 
1218 
1219 
1220   ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg;
1221   ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg;
1222 
1223   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1224   else PetscMemzero(z,N*sizeof(Scalar));
1225 
1226 
1227   idx   = a->j;
1228   v     = a->a;
1229   ii    = a->i;
1230 
1231   switch (bs) {
1232   case 1:
1233     for ( i=0; i<mbs; i++ ) {
1234       n  = ii[1] - ii[0]; ii++;
1235       xb = x + i; x1 = xb[0];
1236       ib = idx + ai[i];
1237       for ( j=0; j<n; j++ ) {
1238         rval    = ib[j];
1239         z[rval] += *v++ * x1;
1240       }
1241     }
1242     break;
1243   case 2:
1244     for ( i=0; i<mbs; i++ ) {
1245       n  = ii[1] - ii[0]; ii++;
1246       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1247       ib = idx + ai[i];
1248       for ( j=0; j<n; j++ ) {
1249         rval      = ib[j]*2;
1250         z[rval++] += v[0]*x1 + v[1]*x2;
1251         z[rval++] += v[2]*x1 + v[3]*x2;
1252         v += 4;
1253       }
1254     }
1255     break;
1256   case 3:
1257     for ( i=0; i<mbs; i++ ) {
1258       n  = ii[1] - ii[0]; ii++;
1259       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1260       ib = idx + ai[i];
1261       for ( j=0; j<n; j++ ) {
1262         rval      = ib[j]*3;
1263         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1264         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1265         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1266         v += 9;
1267       }
1268     }
1269     break;
1270   case 4:
1271     for ( i=0; i<mbs; i++ ) {
1272       n  = ii[1] - ii[0]; ii++;
1273       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1274       ib = idx + ai[i];
1275       for ( j=0; j<n; j++ ) {
1276         rval      = ib[j]*4;
1277         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1278         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1279         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1280         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1281         v += 16;
1282       }
1283     }
1284     break;
1285   case 5:
1286     for ( i=0; i<mbs; i++ ) {
1287       n  = ii[1] - ii[0]; ii++;
1288       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1289       x4 = xb[3];   x5 = xb[4];
1290       ib = idx + ai[i];
1291       for ( j=0; j<n; j++ ) {
1292         rval      = ib[j]*5;
1293         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1294         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1295         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1296         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1297         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1298         v += 25;
1299       }
1300     }
1301     break;
1302       /* block sizes larger then 5 by 5 are handled by BLAS */
1303     default: {
1304       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1305       if (!a->mult_work) {
1306         k = PetscMax(a->m,a->n);
1307         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1308         CHKPTRQ(a->mult_work);
1309       }
1310       work = a->mult_work;
1311       for ( i=0; i<mbs; i++ ) {
1312         n     = ii[1] - ii[0]; ii++;
1313         ncols = n*bs;
1314         PetscMemzero(work,ncols*sizeof(Scalar));
1315         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1316         v += n*bs2;
1317         x += bs;
1318         workt = work;
1319         for ( j=0; j<n; j++ ) {
1320           zb = z + bs*(*idx++);
1321           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1322           workt += bs;
1323         }
1324       }
1325     }
1326   }
1327   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1328   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1329   PLogFlops(2*a->nz*a->bs2);
1330   return 0;
1331 }
1332 
1333 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1334 {
1335   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1336 
1337   info->rows_global    = (double)a->m;
1338   info->columns_global = (double)a->n;
1339   info->rows_local     = (double)a->m;
1340   info->columns_local  = (double)a->n;
1341   info->block_size     = a->bs2;
1342   info->nz_allocated   = a->maxnz;
1343   info->nz_used        = a->bs2*a->nz;
1344   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1345   /*  if (info->nz_unneeded != A->info.nz_unneeded)
1346     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
1347   info->assemblies   = A->num_ass;
1348   info->mallocs      = a->reallocs;
1349   info->memory       = A->mem;
1350   if (A->factor) {
1351     info->fill_ratio_given  = A->info.fill_ratio_given;
1352     info->fill_ratio_needed = A->info.fill_ratio_needed;
1353     info->factor_mallocs    = A->info.factor_mallocs;
1354   } else {
1355     info->fill_ratio_given  = 0;
1356     info->fill_ratio_needed = 0;
1357     info->factor_mallocs    = 0;
1358   }
1359   return 0;
1360 }
1361 
1362 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
1363 {
1364   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1365 
1366   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
1367 
1368   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1369   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1370       (a->nz != b->nz)) {
1371     *flg = PETSC_FALSE; return 0;
1372   }
1373 
1374   /* if the a->i are the same */
1375   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
1376     *flg = PETSC_FALSE; return 0;
1377   }
1378 
1379   /* if a->j are the same */
1380   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
1381     *flg = PETSC_FALSE; return 0;
1382   }
1383 
1384   /* if a->a are the same */
1385   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
1386     *flg = PETSC_FALSE; return 0;
1387   }
1388   *flg = PETSC_TRUE;
1389   return 0;
1390 
1391 }
1392 
1393 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1394 {
1395   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1396   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1397   Scalar      *x, zero = 0.0,*aa,*aa_j;
1398 
1399   bs   = a->bs;
1400   aa   = a->a;
1401   ai   = a->i;
1402   aj   = a->j;
1403   ambs = a->mbs;
1404   bs2  = a->bs2;
1405 
1406   VecSet(&zero,v);
1407   VecGetArray(v,&x); VecGetLocalSize(v,&n);
1408   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
1409   for ( i=0; i<ambs; i++ ) {
1410     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1411       if (aj[j] == i) {
1412         row  = i*bs;
1413         aa_j = aa+j*bs2;
1414         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1415         break;
1416       }
1417     }
1418   }
1419   return 0;
1420 }
1421 
1422 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1423 {
1424   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1425   Scalar      *l,*r,x,*v,*aa,*li,*ri;
1426   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1427 
1428   ai  = a->i;
1429   aj  = a->j;
1430   aa  = a->a;
1431   m   = a->m;
1432   n   = a->n;
1433   bs  = a->bs;
1434   mbs = a->mbs;
1435   bs2 = a->bs2;
1436   if (ll) {
1437     VecGetArray(ll,&l); VecGetSize(ll,&lm);
1438     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
1439     for ( i=0; i<mbs; i++ ) { /* for each block row */
1440       M  = ai[i+1] - ai[i];
1441       li = l + i*bs;
1442       v  = aa + bs2*ai[i];
1443       for ( j=0; j<M; j++ ) { /* for each block */
1444         for ( k=0; k<bs2; k++ ) {
1445           (*v++) *= li[k%bs];
1446         }
1447       }
1448     }
1449   }
1450 
1451   if (rr) {
1452     VecGetArray(rr,&r); VecGetSize(rr,&rn);
1453     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
1454     for ( i=0; i<mbs; i++ ) { /* for each block row */
1455       M  = ai[i+1] - ai[i];
1456       v  = aa + bs2*ai[i];
1457       for ( j=0; j<M; j++ ) { /* for each block */
1458         ri = r + bs*aj[ai[i]+j];
1459         for ( k=0; k<bs; k++ ) {
1460           x = ri[k];
1461           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
1462         }
1463       }
1464     }
1465   }
1466   return 0;
1467 }
1468 
1469 
1470 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1471 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1472 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1473 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1474 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1475 
1476 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1477 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1478 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1479 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1480 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1481 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1482 
1483 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1484 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1485 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1486 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1487 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1488 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1489 
1490 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
1491 {
1492   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1493   Scalar      *v = a->a;
1494   double      sum = 0.0;
1495   int         i,nz=a->nz,bs2=a->bs2;
1496 
1497   if (type == NORM_FROBENIUS) {
1498     for (i=0; i< bs2*nz; i++ ) {
1499 #if defined(PETSC_COMPLEX)
1500       sum += real(conj(*v)*(*v)); v++;
1501 #else
1502       sum += (*v)*(*v); v++;
1503 #endif
1504     }
1505     *norm = sqrt(sum);
1506   }
1507   else {
1508     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
1509   }
1510   return 0;
1511 }
1512 
1513 /*
1514      note: This can only work for identity for row and col. It would
1515    be good to check this and otherwise generate an error.
1516 */
1517 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1518 {
1519   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1520   Mat         outA;
1521   int         ierr;
1522 
1523   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
1524 
1525   outA          = inA;
1526   inA->factor   = FACTOR_LU;
1527   a->row        = row;
1528   a->col        = col;
1529 
1530   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1531 
1532   if (!a->diag) {
1533     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1534   }
1535   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1536   return 0;
1537 }
1538 
1539 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1540 {
1541   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1542   int         one = 1, totalnz = a->bs2*a->nz;
1543   BLscal_( &totalnz, alpha, a->a, &one );
1544   PLogFlops(totalnz);
1545   return 0;
1546 }
1547 
1548 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1549 {
1550   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1551   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1552   int        *ai = a->i, *ailen = a->ilen;
1553   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1554   Scalar     *ap, *aa = a->a, zero = 0.0;
1555 
1556   for ( k=0; k<m; k++ ) { /* loop over rows */
1557     row  = im[k]; brow = row/bs;
1558     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
1559     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1560     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1561     nrow = ailen[brow];
1562     for ( l=0; l<n; l++ ) { /* loop over columns */
1563       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
1564       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1565       col  = in[l] ;
1566       bcol = col/bs;
1567       cidx = col%bs;
1568       ridx = row%bs;
1569       high = nrow;
1570       low  = 0; /* assume unsorted */
1571       while (high-low > 5) {
1572         t = (low+high)/2;
1573         if (rp[t] > bcol) high = t;
1574         else             low  = t;
1575       }
1576       for ( i=low; i<high; i++ ) {
1577         if (rp[i] > bcol) break;
1578         if (rp[i] == bcol) {
1579           *v++ = ap[bs2*i+bs*cidx+ridx];
1580           goto finished;
1581         }
1582       }
1583       *v++ = zero;
1584       finished:;
1585     }
1586   }
1587   return 0;
1588 }
1589 
1590 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
1591 {
1592   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
1593   *bs = baij->bs;
1594   return 0;
1595 }
1596 
1597 /* idx should be of length atlease bs */
1598 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1599 {
1600   int i,row;
1601   row = idx[0];
1602   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1603 
1604   for ( i=1; i<bs; i++ ) {
1605     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1606   }
1607   *flg = PETSC_TRUE;
1608   return 0;
1609 }
1610 
1611 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1612 {
1613   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1614   IS          is_local;
1615   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1616   PetscTruth  flg;
1617   Scalar      zero = 0.0,*aa;
1618 
1619   /* Make a copy of the IS and  sort it */
1620   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1621   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1622   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1623   ierr = ISSort(is_local); CHKERRQ(ierr);
1624   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1625 
1626   i = 0;
1627   while (i < is_n) {
1628     if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range");
1629     flg = PETSC_FALSE;
1630     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1631     if (flg) { /* There exists a block of rows to be Zerowed */
1632       baij->ilen[rows[i]/bs] = 0;
1633       i += bs;
1634     } else { /* Zero out only the requested row */
1635       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1636       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1637       for ( j=0; j<count; j++ ) {
1638         aa[0] = zero;
1639         aa+=bs;
1640       }
1641       i++;
1642     }
1643   }
1644   if (diag) {
1645     for ( j=0; j<is_n; j++ ) {
1646       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1647     }
1648   }
1649   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1650   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1651   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1652   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1653   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1654 
1655   return 0;
1656 }
1657 int MatPrintHelp_SeqBAIJ(Mat A)
1658 {
1659   static int called = 0;
1660   MPI_Comm   comm = A->comm;
1661 
1662   if (called) return 0; else called = 1;
1663   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1664   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1665   return 0;
1666 }
1667 
1668 /* -------------------------------------------------------------------*/
1669 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1670        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1671        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1672        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1673        MatSolve_SeqBAIJ_N,0,
1674        0,0,
1675        MatLUFactor_SeqBAIJ,0,
1676        0,
1677        MatTranspose_SeqBAIJ,
1678        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1679        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1680        0,MatAssemblyEnd_SeqBAIJ,
1681        0,
1682        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
1683        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1684        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1685        MatILUFactorSymbolic_SeqBAIJ,0,
1686        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1687        MatConvertSameType_SeqBAIJ,0,0,
1688        MatILUFactor_SeqBAIJ,0,0,
1689        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
1690        MatGetValues_SeqBAIJ,0,
1691        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
1692        0,0,0,MatGetBlockSize_SeqBAIJ,
1693        MatGetRowIJ_SeqBAIJ,
1694        MatRestoreRowIJ_SeqBAIJ};
1695 
1696 /*@C
1697    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1698    compressed row) format.  For good matrix assembly performance the
1699    user should preallocate the matrix storage by setting the parameter nz
1700    (or the array nzz).  By setting these parameters accurately, performance
1701    during matrix assembly can be increased by more than a factor of 50.
1702 
1703    Input Parameters:
1704 .  comm - MPI communicator, set to MPI_COMM_SELF
1705 .  bs - size of block
1706 .  m - number of rows
1707 .  n - number of columns
1708 .  nz - number of block nonzeros per block row (same for all rows)
1709 .  nzz - array containing the number of block nonzeros in the various block rows
1710          (possibly different for each block row) or PETSC_NULL
1711 
1712    Output Parameter:
1713 .  A - the matrix
1714 
1715    Notes:
1716    The block AIJ format is fully compatible with standard Fortran 77
1717    storage.  That is, the stored row and column indices can begin at
1718    either one (as in Fortran) or zero.  See the users' manual for details.
1719 
1720    Specify the preallocated storage with either nz or nnz (not both).
1721    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1722    allocation.  For additional details, see the users manual chapter on
1723    matrices and the file $(PETSC_DIR)/Performance.
1724 
1725 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1726 @*/
1727 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1728 {
1729   Mat         B;
1730   Mat_SeqBAIJ *b;
1731   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
1732 
1733   MPI_Comm_size(comm,&size);
1734   if (size > 1) SETERRQ(1,"MatCreateSeqBAIJ:Comm must be of size 1");
1735 
1736   if (mbs*bs!=m || nbs*bs!=n)
1737     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
1738 
1739   *A = 0;
1740   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1741   PLogObjectCreate(B);
1742   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1743   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1744   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1745   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1746   if (!flg) {
1747     switch (bs) {
1748       case 1:
1749         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1750         B->ops.solve           = MatSolve_SeqBAIJ_1;
1751         B->ops.mult            = MatMult_SeqBAIJ_1;
1752         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
1753        break;
1754       case 2:
1755         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1756         B->ops.solve           = MatSolve_SeqBAIJ_2;
1757         B->ops.mult            = MatMult_SeqBAIJ_2;
1758         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
1759         break;
1760       case 3:
1761         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1762         B->ops.solve           = MatSolve_SeqBAIJ_3;
1763         B->ops.mult            = MatMult_SeqBAIJ_3;
1764         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
1765         break;
1766       case 4:
1767         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1768         B->ops.solve           = MatSolve_SeqBAIJ_4;
1769         B->ops.mult            = MatMult_SeqBAIJ_4;
1770         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
1771         break;
1772       case 5:
1773         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1774         B->ops.solve           = MatSolve_SeqBAIJ_5;
1775         B->ops.mult            = MatMult_SeqBAIJ_5;
1776         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
1777         break;
1778     }
1779   }
1780   B->destroy          = MatDestroy_SeqBAIJ;
1781   B->view             = MatView_SeqBAIJ;
1782   B->factor           = 0;
1783   B->lupivotthreshold = 1.0;
1784   b->row              = 0;
1785   b->col              = 0;
1786   b->reallocs         = 0;
1787 
1788   b->m       = m; B->m = m; B->M = m;
1789   b->n       = n; B->n = n; B->N = n;
1790   b->mbs     = mbs;
1791   b->nbs     = nbs;
1792   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1793   if (nnz == PETSC_NULL) {
1794     if (nz == PETSC_DEFAULT) nz = 5;
1795     else if (nz <= 0)        nz = 1;
1796     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1797     nz = nz*mbs;
1798   }
1799   else {
1800     nz = 0;
1801     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1802   }
1803 
1804   /* allocate the matrix space */
1805   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1806   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1807   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1808   b->j  = (int *) (b->a + nz*bs2);
1809   PetscMemzero(b->j,nz*sizeof(int));
1810   b->i  = b->j + nz;
1811   b->singlemalloc = 1;
1812 
1813   b->i[0] = 0;
1814   for (i=1; i<mbs+1; i++) {
1815     b->i[i] = b->i[i-1] + b->imax[i-1];
1816   }
1817 
1818   /* b->ilen will count nonzeros in each block row so far. */
1819   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1820   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1821   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1822 
1823   b->bs               = bs;
1824   b->bs2              = bs2;
1825   b->mbs              = mbs;
1826   b->nz               = 0;
1827   b->maxnz            = nz*bs2;
1828   b->sorted           = 0;
1829   b->roworiented      = 1;
1830   b->nonew            = 0;
1831   b->diag             = 0;
1832   b->solve_work       = 0;
1833   b->mult_work        = 0;
1834   b->spptr            = 0;
1835   B->info.nz_unneeded = (double)b->maxnz;
1836 
1837   *A = B;
1838   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1839   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1840   return 0;
1841 }
1842 
1843 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1844 {
1845   Mat         C;
1846   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1847   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1848 
1849   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1850 
1851   *B = 0;
1852   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1853   PLogObjectCreate(C);
1854   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1855   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1856   C->destroy    = MatDestroy_SeqBAIJ;
1857   C->view       = MatView_SeqBAIJ;
1858   C->factor     = A->factor;
1859   c->row        = 0;
1860   c->col        = 0;
1861   C->assembled  = PETSC_TRUE;
1862 
1863   c->m = C->m   = a->m;
1864   c->n = C->n   = a->n;
1865   C->M          = a->m;
1866   C->N          = a->n;
1867 
1868   c->bs         = a->bs;
1869   c->bs2        = a->bs2;
1870   c->mbs        = a->mbs;
1871   c->nbs        = a->nbs;
1872 
1873   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1874   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1875   for ( i=0; i<mbs; i++ ) {
1876     c->imax[i] = a->imax[i];
1877     c->ilen[i] = a->ilen[i];
1878   }
1879 
1880   /* allocate the matrix space */
1881   c->singlemalloc = 1;
1882   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1883   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1884   c->j  = (int *) (c->a + nz*bs2);
1885   c->i  = c->j + nz;
1886   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1887   if (mbs > 0) {
1888     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1889     if (cpvalues == COPY_VALUES) {
1890       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1891     }
1892   }
1893 
1894   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1895   c->sorted      = a->sorted;
1896   c->roworiented = a->roworiented;
1897   c->nonew       = a->nonew;
1898 
1899   if (a->diag) {
1900     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1901     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1902     for ( i=0; i<mbs; i++ ) {
1903       c->diag[i] = a->diag[i];
1904     }
1905   }
1906   else c->diag          = 0;
1907   c->nz                 = a->nz;
1908   c->maxnz              = a->maxnz;
1909   c->solve_work         = 0;
1910   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1911   c->mult_work          = 0;
1912   *B = C;
1913   return 0;
1914 }
1915 
1916 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1917 {
1918   Mat_SeqBAIJ  *a;
1919   Mat          B;
1920   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1921   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1922   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1923   int          *masked, nmask,tmp,bs2,ishift;
1924   Scalar       *aa;
1925   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1926 
1927   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1928   bs2  = bs*bs;
1929 
1930   MPI_Comm_size(comm,&size);
1931   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1932   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1933   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1934   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1935   M = header[1]; N = header[2]; nz = header[3];
1936 
1937   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1938 
1939   /*
1940      This code adds extra rows to make sure the number of rows is
1941     divisible by the blocksize
1942   */
1943   mbs        = M/bs;
1944   extra_rows = bs - M + bs*(mbs);
1945   if (extra_rows == bs) extra_rows = 0;
1946   else                  mbs++;
1947   if (extra_rows) {
1948     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1949   }
1950 
1951   /* read in row lengths */
1952   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1953   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1954   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1955 
1956   /* read in column indices */
1957   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1958   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1959   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1960 
1961   /* loop over row lengths determining block row lengths */
1962   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1963   PetscMemzero(browlengths,mbs*sizeof(int));
1964   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1965   PetscMemzero(mask,mbs*sizeof(int));
1966   masked = mask + mbs;
1967   rowcount = 0; nzcount = 0;
1968   for ( i=0; i<mbs; i++ ) {
1969     nmask = 0;
1970     for ( j=0; j<bs; j++ ) {
1971       kmax = rowlengths[rowcount];
1972       for ( k=0; k<kmax; k++ ) {
1973         tmp = jj[nzcount++]/bs;
1974         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1975       }
1976       rowcount++;
1977     }
1978     browlengths[i] += nmask;
1979     /* zero out the mask elements we set */
1980     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1981   }
1982 
1983   /* create our matrix */
1984   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1985          CHKERRQ(ierr);
1986   B = *A;
1987   a = (Mat_SeqBAIJ *) B->data;
1988 
1989   /* set matrix "i" values */
1990   a->i[0] = 0;
1991   for ( i=1; i<= mbs; i++ ) {
1992     a->i[i]      = a->i[i-1] + browlengths[i-1];
1993     a->ilen[i-1] = browlengths[i-1];
1994   }
1995   a->nz         = 0;
1996   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
1997 
1998   /* read in nonzero values */
1999   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
2000   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
2001   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2002 
2003   /* set "a" and "j" values into matrix */
2004   nzcount = 0; jcount = 0;
2005   for ( i=0; i<mbs; i++ ) {
2006     nzcountb = nzcount;
2007     nmask    = 0;
2008     for ( j=0; j<bs; j++ ) {
2009       kmax = rowlengths[i*bs+j];
2010       for ( k=0; k<kmax; k++ ) {
2011         tmp = jj[nzcount++]/bs;
2012 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2013       }
2014       rowcount++;
2015     }
2016     /* sort the masked values */
2017     PetscSortInt(nmask,masked);
2018 
2019     /* set "j" values into matrix */
2020     maskcount = 1;
2021     for ( j=0; j<nmask; j++ ) {
2022       a->j[jcount++]  = masked[j];
2023       mask[masked[j]] = maskcount++;
2024     }
2025     /* set "a" values into matrix */
2026     ishift = bs2*a->i[i];
2027     for ( j=0; j<bs; j++ ) {
2028       kmax = rowlengths[i*bs+j];
2029       for ( k=0; k<kmax; k++ ) {
2030         tmp    = jj[nzcountb]/bs ;
2031         block  = mask[tmp] - 1;
2032         point  = jj[nzcountb] - bs*tmp;
2033         idx    = ishift + bs2*block + j + bs*point;
2034         a->a[idx] = aa[nzcountb++];
2035       }
2036     }
2037     /* zero out the mask elements we set */
2038     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2039   }
2040   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
2041 
2042   PetscFree(rowlengths);
2043   PetscFree(browlengths);
2044   PetscFree(aa);
2045   PetscFree(jj);
2046   PetscFree(mask);
2047 
2048   B->assembled = PETSC_TRUE;
2049 
2050   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2051   if (flg) {
2052     Viewer tviewer;
2053     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2054     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
2055     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2056     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2057   }
2058   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2059   if (flg) {
2060     Viewer tviewer;
2061     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2062     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
2063     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2064     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2065   }
2066   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2067   if (flg) {
2068     Viewer tviewer;
2069     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2070     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2071     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2072   }
2073   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2074   if (flg) {
2075     Viewer tviewer;
2076     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2077     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
2078     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2079     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2080   }
2081   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2082   if (flg) {
2083     Viewer tviewer;
2084     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
2085     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
2086     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
2087     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2088   }
2089   return 0;
2090 }
2091 
2092 
2093 
2094