xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 00a98328c1f4fb9da36b452d9383e7a2e935b046)
1 #ifndef lint
2 static char vcid[] = "$Id: baij.c,v 1.77 1996/12/02 20:13:16 balay Exp bsmith $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 #include "src/mat/impls/baij/seq/baij.h"
10 #include "src/vec/vecimpl.h"
11 #include "src/inline/spops.h"
12 #include "petsc.h"
13 
14 
15 /*
16      Adds diagonal pointers to sparse matrix structure.
17 */
18 
19 int MatMarkDiag_SeqBAIJ(Mat A)
20 {
21   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
22   int         i,j, *diag, m = a->mbs;
23 
24   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
25   PLogObjectMemory(A,(m+1)*sizeof(int));
26   for ( i=0; i<m; i++ ) {
27     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
28       if (a->j[j] == i) {
29         diag[i] = j;
30         break;
31       }
32     }
33   }
34   a->diag = diag;
35   return 0;
36 }
37 
38 #include "draw.h"
39 #include "pinclude/pviewer.h"
40 #include "sys.h"
41 
42 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
43 
44 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
45                             PetscTruth *done)
46 {
47   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
48   int         ierr, n = a->mbs,i;
49 
50   *nn = n;
51   if (!ia) return 0;
52   if (symmetric) {
53     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
54   } else if (oshift == 1) {
55     /* temporarily add 1 to i and j indices */
56     int nz = a->i[n] + 1;
57     for ( i=0; i<nz; i++ ) a->j[i]++;
58     for ( i=0; i<n+1; i++ ) a->i[i]++;
59     *ia = a->i; *ja = a->j;
60   } else {
61     *ia = a->i; *ja = a->j;
62   }
63 
64   return 0;
65 }
66 
67 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
68                                 PetscTruth *done)
69 {
70   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
71   int         i,n = a->mbs;
72 
73   if (!ia) return 0;
74   if (symmetric) {
75     PetscFree(*ia);
76     PetscFree(*ja);
77   } else if (oshift == 1) {
78     int nz = a->i[n];
79     for ( i=0; i<nz; i++ ) a->j[i]--;
80     for ( i=0; i<n+1; i++ ) a->i[i]--;
81   }
82   return 0;
83 }
84 
85 
86 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
87 {
88   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
89   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
90   Scalar      *aa;
91 
92   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
93   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
94   col_lens[0] = MAT_COOKIE;
95 
96   col_lens[1] = a->m;
97   col_lens[2] = a->n;
98   col_lens[3] = a->nz*bs2;
99 
100   /* store lengths of each row and write (including header) to file */
101   count = 0;
102   for ( i=0; i<a->mbs; i++ ) {
103     for ( j=0; j<bs; j++ ) {
104       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
105     }
106   }
107   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
108   PetscFree(col_lens);
109 
110   /* store column indices (zero start index) */
111   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
112   count = 0;
113   for ( i=0; i<a->mbs; i++ ) {
114     for ( j=0; j<bs; j++ ) {
115       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
116         for ( l=0; l<bs; l++ ) {
117           jj[count++] = bs*a->j[k] + l;
118         }
119       }
120     }
121   }
122   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
123   PetscFree(jj);
124 
125   /* store nonzero values */
126   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
127   count = 0;
128   for ( i=0; i<a->mbs; i++ ) {
129     for ( j=0; j<bs; j++ ) {
130       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
131         for ( l=0; l<bs; l++ ) {
132           aa[count++] = a->a[bs2*k + l*bs + j];
133         }
134       }
135     }
136   }
137   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
138   PetscFree(aa);
139   return 0;
140 }
141 
142 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
143 {
144   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
145   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
146   FILE        *fd;
147   char        *outputname;
148 
149   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
150   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
151   ierr = ViewerGetFormat(viewer,&format);
152   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
153     fprintf(fd,"  block size is %d\n",bs);
154   }
155   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
156     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
157   }
158   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
159     for ( i=0; i<a->mbs; i++ ) {
160       for ( j=0; j<bs; j++ ) {
161         fprintf(fd,"row %d:",i*bs+j);
162         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
163           for ( l=0; l<bs; l++ ) {
164 #if defined(PETSC_COMPLEX)
165           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
166             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
167               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
168           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
169             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
170 #else
171           if (a->a[bs2*k + l*bs + j] != 0.0)
172             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
173 #endif
174           }
175         }
176         fprintf(fd,"\n");
177       }
178     }
179   }
180   else {
181     for ( i=0; i<a->mbs; i++ ) {
182       for ( j=0; j<bs; j++ ) {
183         fprintf(fd,"row %d:",i*bs+j);
184         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
185           for ( l=0; l<bs; l++ ) {
186 #if defined(PETSC_COMPLEX)
187           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
188             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
189               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
190           }
191           else {
192             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
193           }
194 #else
195             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
196 #endif
197           }
198         }
199         fprintf(fd,"\n");
200       }
201     }
202   }
203   fflush(fd);
204   return 0;
205 }
206 
207 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
208 {
209   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
210   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
211   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
212   Scalar       *aa;
213   Draw         draw;
214   DrawButton   button;
215   PetscTruth   isnull;
216 
217   ViewerDrawGetDraw(viewer,&draw);
218   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
219 
220   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
221   xr += w;    yr += h;  xl = -w;     yl = -h;
222   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
223   /* loop over matrix elements drawing boxes */
224   color = DRAW_BLUE;
225   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
226     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
227       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
228       x_l = a->j[j]*bs; x_r = x_l + 1.0;
229       aa = a->a + j*bs2;
230       for ( k=0; k<bs; k++ ) {
231         for ( l=0; l<bs; l++ ) {
232           if (PetscReal(*aa++) >=  0.) continue;
233           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
234         }
235       }
236     }
237   }
238   color = DRAW_CYAN;
239   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
240     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
241       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
242       x_l = a->j[j]*bs; x_r = x_l + 1.0;
243       aa = a->a + j*bs2;
244       for ( k=0; k<bs; k++ ) {
245         for ( l=0; l<bs; l++ ) {
246           if (PetscReal(*aa++) != 0.) continue;
247           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
248         }
249       }
250     }
251   }
252 
253   color = DRAW_RED;
254   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
255     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
256       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
257       x_l = a->j[j]*bs; x_r = x_l + 1.0;
258       aa = a->a + j*bs2;
259       for ( k=0; k<bs; k++ ) {
260         for ( l=0; l<bs; l++ ) {
261           if (PetscReal(*aa++) <= 0.) continue;
262           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
263         }
264       }
265     }
266   }
267 
268   DrawFlush(draw);
269   DrawGetPause(draw,&pause);
270   if (pause >= 0) { PetscSleep(pause); return 0;}
271 
272   /* allow the matrix to zoom or shrink */
273   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
274   while (button != BUTTON_RIGHT) {
275     DrawClear(draw);
276     if (button == BUTTON_LEFT) scale = .5;
277     else if (button == BUTTON_CENTER) scale = 2.;
278     xl = scale*(xl + w - xc) + xc - w*scale;
279     xr = scale*(xr - w - xc) + xc + w*scale;
280     yl = scale*(yl + h - yc) + yc - h*scale;
281     yr = scale*(yr - h - yc) + yc + h*scale;
282     w *= scale; h *= scale;
283     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
284     color = DRAW_BLUE;
285     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
286       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
287         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
288         x_l = a->j[j]*bs; x_r = x_l + 1.0;
289         aa = a->a + j*bs2;
290         for ( k=0; k<bs; k++ ) {
291           for ( l=0; l<bs; l++ ) {
292             if (PetscReal(*aa++) >=  0.) continue;
293             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
294           }
295         }
296       }
297     }
298     color = DRAW_CYAN;
299     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
300       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
301         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
302         x_l = a->j[j]*bs; x_r = x_l + 1.0;
303         aa = a->a + j*bs2;
304         for ( k=0; k<bs; k++ ) {
305           for ( l=0; l<bs; l++ ) {
306           if (PetscReal(*aa++) != 0.) continue;
307           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
308           }
309         }
310       }
311     }
312 
313     color = DRAW_RED;
314     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
315       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
316         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
317         x_l = a->j[j]*bs; x_r = x_l + 1.0;
318         aa = a->a + j*bs2;
319         for ( k=0; k<bs; k++ ) {
320           for ( l=0; l<bs; l++ ) {
321             if (PetscReal(*aa++) <= 0.) continue;
322             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
323           }
324         }
325       }
326     }
327     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
328   }
329   return 0;
330 }
331 
332 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
333 {
334   Mat         A = (Mat) obj;
335   ViewerType  vtype;
336   int         ierr;
337 
338   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
339   if (vtype == MATLAB_VIEWER) {
340     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
341   }
342   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
343     return MatView_SeqBAIJ_ASCII(A,viewer);
344   }
345   else if (vtype == BINARY_FILE_VIEWER) {
346     return MatView_SeqBAIJ_Binary(A,viewer);
347   }
348   else if (vtype == DRAW_VIEWER) {
349     return MatView_SeqBAIJ_Draw(A,viewer);
350   }
351   return 0;
352 }
353 
354 #define CHUNKSIZE  10
355 
356 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
357 {
358   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
359   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
360   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
361   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
362   int          ridx,cidx,bs2=a->bs2;
363   Scalar      *ap,value,*aa=a->a,*bap;
364 
365   for ( k=0; k<m; k++ ) { /* loop over added rows */
366     row  = im[k]; brow = row/bs;
367 #if defined(PETSC_BOPT_g)
368     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
369     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
370 #endif
371     rp   = aj + ai[brow];
372     ap   = aa + bs2*ai[brow];
373     rmax = imax[brow];
374     nrow = ailen[brow];
375     low  = 0;
376     for ( l=0; l<n; l++ ) { /* loop over added columns */
377 #if defined(PETSC_BOPT_g)
378       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
379       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
380 #endif
381       col = in[l]; bcol = col/bs;
382       ridx = row % bs; cidx = col % bs;
383       if (roworiented) {
384         value = *v++;
385       }
386       else {
387         value = v[k + l*m];
388       }
389       if (!sorted) low = 0; high = nrow;
390       while (high-low > 7) {
391         t = (low+high)/2;
392         if (rp[t] > bcol) high = t;
393         else              low  = t;
394       }
395       for ( i=low; i<high; i++ ) {
396         if (rp[i] > bcol) break;
397         if (rp[i] == bcol) {
398           bap  = ap +  bs2*i + bs*cidx + ridx;
399           if (is == ADD_VALUES) *bap += value;
400           else                  *bap  = value;
401           goto noinsert;
402         }
403       }
404       if (nonew) goto noinsert;
405       if (nrow >= rmax) {
406         /* there is no extra room in row, therefore enlarge */
407         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
408         Scalar *new_a;
409 
410         /* malloc new storage space */
411         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
412         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
413         new_j   = (int *) (new_a + bs2*new_nz);
414         new_i   = new_j + new_nz;
415 
416         /* copy over old data into new slots */
417         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
418         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
419         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
420         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
421         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
422                                                            len*sizeof(int));
423         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
424         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
425         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
426                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
427         /* free up old matrix storage */
428         PetscFree(a->a);
429         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
430         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
431         a->singlemalloc = 1;
432 
433         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
434         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
435         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
436         a->maxnz += bs2*CHUNKSIZE;
437         a->reallocs++;
438         a->nz++;
439       }
440       N = nrow++ - 1;
441       /* shift up all the later entries in this row */
442       for ( ii=N; ii>=i; ii-- ) {
443         rp[ii+1] = rp[ii];
444         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
445       }
446       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
447       rp[i]                      = bcol;
448       ap[bs2*i + bs*cidx + ridx] = value;
449       noinsert:;
450       low = i;
451     }
452     ailen[brow] = nrow;
453   }
454   return 0;
455 }
456 
457 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
458 {
459   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
460   *m = a->m; *n = a->n;
461   return 0;
462 }
463 
464 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
465 {
466   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
467   *m = 0; *n = a->m;
468   return 0;
469 }
470 
471 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
472 {
473   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
474   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
475   Scalar      *aa,*v_i,*aa_i;
476 
477   bs  = a->bs;
478   ai  = a->i;
479   aj  = a->j;
480   aa  = a->a;
481   bs2 = a->bs2;
482 
483   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
484 
485   bn  = row/bs;   /* Block number */
486   bp  = row % bs; /* Block Position */
487   M   = ai[bn+1] - ai[bn];
488   *nz = bs*M;
489 
490   if (v) {
491     *v = 0;
492     if (*nz) {
493       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
494       for ( i=0; i<M; i++ ) { /* for each block in the block row */
495         v_i  = *v + i*bs;
496         aa_i = aa + bs2*(ai[bn] + i);
497         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
498       }
499     }
500   }
501 
502   if (idx) {
503     *idx = 0;
504     if (*nz) {
505       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
506       for ( i=0; i<M; i++ ) { /* for each block in the block row */
507         idx_i = *idx + i*bs;
508         itmp  = bs*aj[ai[bn] + i];
509         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
510       }
511     }
512   }
513   return 0;
514 }
515 
516 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
517 {
518   if (idx) {if (*idx) PetscFree(*idx);}
519   if (v)   {if (*v)   PetscFree(*v);}
520   return 0;
521 }
522 
523 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
524 {
525   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
526   Mat         C;
527   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
528   int         *rows,*cols,bs2=a->bs2;
529   Scalar      *array=a->a;
530 
531   if (B==PETSC_NULL && mbs!=nbs)
532     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
533   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
534   PetscMemzero(col,(1+nbs)*sizeof(int));
535 
536   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
537   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
538   PetscFree(col);
539   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
540   cols = rows + bs;
541   for ( i=0; i<mbs; i++ ) {
542     cols[0] = i*bs;
543     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
544     len = ai[i+1] - ai[i];
545     for ( j=0; j<len; j++ ) {
546       rows[0] = (*aj++)*bs;
547       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
548       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
549       array += bs2;
550     }
551   }
552   PetscFree(rows);
553 
554   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
555   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
556 
557   if (B != PETSC_NULL) {
558     *B = C;
559   } else {
560     /* This isn't really an in-place transpose */
561     PetscFree(a->a);
562     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
563     if (a->diag) PetscFree(a->diag);
564     if (a->ilen) PetscFree(a->ilen);
565     if (a->imax) PetscFree(a->imax);
566     if (a->solve_work) PetscFree(a->solve_work);
567     PetscFree(a);
568     PetscMemcpy(A,C,sizeof(struct _Mat));
569     PetscHeaderDestroy(C);
570   }
571   return 0;
572 }
573 
574 
575 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
576 {
577   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
578   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
579   int        m = a->m,*ip, N, *ailen = a->ilen;
580   int        mbs = a->mbs, bs2 = a->bs2;
581   Scalar     *aa = a->a, *ap;
582 
583   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
584 
585   for ( i=1; i<mbs; i++ ) {
586     /* move each row back by the amount of empty slots (fshift) before it*/
587     fshift += imax[i-1] - ailen[i-1];
588     if (fshift) {
589       ip = aj + ai[i]; ap = aa + bs2*ai[i];
590       N = ailen[i];
591       for ( j=0; j<N; j++ ) {
592         ip[j-fshift] = ip[j];
593         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
594       }
595     }
596     ai[i] = ai[i-1] + ailen[i-1];
597   }
598   if (mbs) {
599     fshift += imax[mbs-1] - ailen[mbs-1];
600     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
601   }
602   /* reset ilen and imax for each row */
603   for ( i=0; i<mbs; i++ ) {
604     ailen[i] = imax[i] = ai[i+1] - ai[i];
605   }
606   a->nz = ai[mbs];
607 
608   /* diagonals may have moved, so kill the diagonal pointers */
609   if (fshift && a->diag) {
610     PetscFree(a->diag);
611     PLogObjectMemory(A,-(m+1)*sizeof(int));
612     a->diag = 0;
613   }
614   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",
615            m,a->n,a->bs,fshift*bs2,a->nz*bs2);
616   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",
617            a->reallocs);
618   a->reallocs          = 0;
619   A->info.nz_unneeded  = (double)fshift*bs2;
620 
621   return 0;
622 }
623 
624 static int MatZeroEntries_SeqBAIJ(Mat A)
625 {
626   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
627   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
628   return 0;
629 }
630 
631 int MatDestroy_SeqBAIJ(PetscObject obj)
632 {
633   Mat         A  = (Mat) obj;
634   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
635   int         ierr;
636 
637 #if defined(PETSC_LOG)
638   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
639 #endif
640   PetscFree(a->a);
641   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
642   if (a->diag) PetscFree(a->diag);
643   if (a->ilen) PetscFree(a->ilen);
644   if (a->imax) PetscFree(a->imax);
645   if (a->solve_work) PetscFree(a->solve_work);
646   if (a->mult_work) PetscFree(a->mult_work);
647   PetscFree(a);
648   if (A->mapping) {
649     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
650   }
651   PLogObjectDestroy(A);
652   PetscHeaderDestroy(A);
653   return 0;
654 }
655 
656 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
657 {
658   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
659   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
660   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
661   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
662   else if (op == MAT_COLUMNS_UNSORTED)          a->sorted      = 0;
663   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
664   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
665   else if (op == MAT_ROWS_SORTED ||
666            op == MAT_ROWS_UNSORTED ||
667            op == MAT_SYMMETRIC ||
668            op == MAT_STRUCTURALLY_SYMMETRIC ||
669            op == MAT_YES_NEW_DIAGONALS ||
670            op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES)
671     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
672   else if (op == MAT_NO_NEW_DIAGONALS)
673     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");}
674   else
675     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
676   return 0;
677 }
678 
679 
680 /* -------------------------------------------------------*/
681 /* Should check that shapes of vectors and matrices match */
682 /* -------------------------------------------------------*/
683 #include "pinclude/plapack.h"
684 
685 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
686 {
687   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
688   register Scalar *x,*z,*v,sum;
689   int             mbs=a->mbs,i,*idx,*ii,n;
690 
691   VecGetArray_Fast(xx,x);
692   VecGetArray_Fast(zz,z);
693 
694   idx   = a->j;
695   v     = a->a;
696   ii    = a->i;
697 
698   for ( i=0; i<mbs; i++ ) {
699     n    = ii[1] - ii[0]; ii++;
700     sum  = 0.0;
701     while (n--) sum += *v++ * x[*idx++];
702     z[i] = sum;
703   }
704   VecRestoreArray_Fast(xx,x);
705   VecRestoreArray_Fast(zz,z);
706   PLogFlops(2*a->nz - a->m);
707   return 0;
708 }
709 
710 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
711 {
712   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
713   register Scalar *x,*z,*v,*xb,sum1,sum2;
714   register Scalar x1,x2;
715   int             mbs=a->mbs,i,*idx,*ii;
716   int             j,n;
717 
718   VecGetArray_Fast(xx,x);
719   VecGetArray_Fast(zz,z);
720 
721   idx   = a->j;
722   v     = a->a;
723   ii    = a->i;
724 
725   for ( i=0; i<mbs; i++ ) {
726     n  = ii[1] - ii[0]; ii++;
727     sum1 = 0.0; sum2 = 0.0;
728     for ( j=0; j<n; j++ ) {
729       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
730       sum1 += v[0]*x1 + v[2]*x2;
731       sum2 += v[1]*x1 + v[3]*x2;
732       v += 4;
733     }
734     z[0] = sum1; z[1] = sum2;
735     z += 2;
736   }
737   VecRestoreArray_Fast(xx,x);
738   VecRestoreArray_Fast(zz,z);
739   PLogFlops(4*a->nz - a->m);
740   return 0;
741 }
742 
743 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
744 {
745   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
746   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
747   int             mbs=a->mbs,i,*idx,*ii,j,n;
748 
749   VecGetArray_Fast(xx,x);
750   VecGetArray_Fast(zz,z);
751 
752   idx   = a->j;
753   v     = a->a;
754   ii    = a->i;
755 
756   for ( i=0; i<mbs; i++ ) {
757     n  = ii[1] - ii[0]; ii++;
758     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
759     for ( j=0; j<n; j++ ) {
760       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
761       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
762       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
763       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
764       v += 9;
765     }
766     z[0] = sum1; z[1] = sum2; z[2] = sum3;
767     z += 3;
768   }
769   VecRestoreArray_Fast(xx,x);
770   VecRestoreArray_Fast(zz,z);
771   PLogFlops(18*a->nz - a->m);
772   return 0;
773 }
774 
775 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
776 {
777   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
778   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
779   register Scalar x1,x2,x3,x4;
780   int             mbs=a->mbs,i,*idx,*ii;
781   int             j,n;
782 
783   VecGetArray_Fast(xx,x);
784   VecGetArray_Fast(zz,z);
785 
786   idx   = a->j;
787   v     = a->a;
788   ii    = a->i;
789 
790   for ( i=0; i<mbs; i++ ) {
791     n  = ii[1] - ii[0]; ii++;
792     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
793     for ( j=0; j<n; j++ ) {
794       xb = x + 4*(*idx++);
795       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
796       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
797       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
798       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
799       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
800       v += 16;
801     }
802     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
803     z += 4;
804   }
805   VecRestoreArray_Fast(xx,x);
806   VecRestoreArray_Fast(zz,z);
807   PLogFlops(32*a->nz - a->m);
808   return 0;
809 }
810 
811 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
812 {
813   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
814   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
815   register Scalar x1,x2,x3,x4,x5;
816   int             mbs=a->mbs,i,*idx,*ii,j,n;
817 
818   VecGetArray_Fast(xx,x);
819   VecGetArray_Fast(zz,z);
820 
821   idx   = a->j;
822   v     = a->a;
823   ii    = a->i;
824 
825   for ( i=0; i<mbs; i++ ) {
826     n  = ii[1] - ii[0]; ii++;
827     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
828     for ( j=0; j<n; j++ ) {
829       xb = x + 5*(*idx++);
830       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
831       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
832       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
833       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
834       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
835       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
836       v += 25;
837     }
838     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
839     z += 5;
840   }
841   VecRestoreArray_Fast(xx,x);
842   VecRestoreArray_Fast(zz,z);
843   PLogFlops(50*a->nz - a->m);
844   return 0;
845 }
846 
847 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
848 {
849   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
850   register Scalar *x,*z,*v,*xb;
851   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
852   int             _One = 1,ncols,k;
853   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
854 
855   VecGetArray_Fast(xx,x);
856   VecGetArray_Fast(zz,z);
857 
858   idx   = a->j;
859   v     = a->a;
860   ii    = a->i;
861 
862 
863   if (!a->mult_work) {
864     k = PetscMax(a->m,a->n);
865     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
866   }
867   work = a->mult_work;
868   for ( i=0; i<mbs; i++ ) {
869     n     = ii[1] - ii[0]; ii++;
870     ncols = n*bs;
871     workt = work;
872     for ( j=0; j<n; j++ ) {
873       xb = x + bs*(*idx++);
874       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
875       workt += bs;
876     }
877     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
878     v += n*bs2;
879     z += bs;
880   }
881   VecRestoreArray_Fast(xx,x);
882   VecRestoreArray_Fast(zz,z);
883   PLogFlops(2*a->nz*bs2 - a->m);
884   return 0;
885 }
886 
887 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
888 {
889   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
890   register Scalar *x,*y,*z,*v,sum;
891   int             mbs=a->mbs,i,*idx,*ii,n;
892 
893   VecGetArray_Fast(xx,x);
894   VecGetArray_Fast(yy,y);
895   VecGetArray_Fast(zz,z);
896 
897   idx   = a->j;
898   v     = a->a;
899   ii    = a->i;
900 
901   for ( i=0; i<mbs; i++ ) {
902     n    = ii[1] - ii[0]; ii++;
903     sum  = y[i];
904     while (n--) sum += *v++ * x[*idx++];
905     z[i] = sum;
906   }
907   VecRestoreArray_Fast(xx,x);
908   VecRestoreArray_Fast(yy,y);
909   VecRestoreArray_Fast(zz,z);
910   PLogFlops(2*a->nz);
911   return 0;
912 }
913 
914 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
915 {
916   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
917   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
918   register Scalar x1,x2;
919   int             mbs=a->mbs,i,*idx,*ii;
920   int             j,n;
921 
922   VecGetArray_Fast(xx,x);
923   VecGetArray_Fast(yy,y);
924   VecGetArray_Fast(zz,z);
925 
926   idx   = a->j;
927   v     = a->a;
928   ii    = a->i;
929 
930   for ( i=0; i<mbs; i++ ) {
931     n  = ii[1] - ii[0]; ii++;
932     sum1 = y[0]; sum2 = y[1];
933     for ( j=0; j<n; j++ ) {
934       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
935       sum1 += v[0]*x1 + v[2]*x2;
936       sum2 += v[1]*x1 + v[3]*x2;
937       v += 4;
938     }
939     z[0] = sum1; z[1] = sum2;
940     z += 2; y += 2;
941   }
942   VecRestoreArray_Fast(xx,x);
943   VecRestoreArray_Fast(yy,y);
944   VecRestoreArray_Fast(zz,z);
945   PLogFlops(4*a->nz);
946   return 0;
947 }
948 
949 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
950 {
951   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
952   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
953   int             mbs=a->mbs,i,*idx,*ii,j,n;
954 
955   VecGetArray_Fast(xx,x);
956   VecGetArray_Fast(yy,y);
957   VecGetArray_Fast(zz,z);
958 
959   idx   = a->j;
960   v     = a->a;
961   ii    = a->i;
962 
963   for ( i=0; i<mbs; i++ ) {
964     n  = ii[1] - ii[0]; ii++;
965     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
966     for ( j=0; j<n; j++ ) {
967       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
968       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
969       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
970       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
971       v += 9;
972     }
973     z[0] = sum1; z[1] = sum2; z[2] = sum3;
974     z += 3; y += 3;
975   }
976   VecRestoreArray_Fast(xx,x);
977   VecRestoreArray_Fast(yy,y);
978   VecRestoreArray_Fast(zz,z);
979   PLogFlops(18*a->nz);
980   return 0;
981 }
982 
983 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
984 {
985   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
986   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
987   register Scalar x1,x2,x3,x4;
988   int             mbs=a->mbs,i,*idx,*ii;
989   int             j,n;
990 
991   VecGetArray_Fast(xx,x);
992   VecGetArray_Fast(yy,y);
993   VecGetArray_Fast(zz,z);
994 
995   idx   = a->j;
996   v     = a->a;
997   ii    = a->i;
998 
999   for ( i=0; i<mbs; i++ ) {
1000     n  = ii[1] - ii[0]; ii++;
1001     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1002     for ( j=0; j<n; j++ ) {
1003       xb = x + 4*(*idx++);
1004       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1005       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1006       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1007       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1008       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1009       v += 16;
1010     }
1011     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1012     z += 4; y += 4;
1013   }
1014   VecRestoreArray_Fast(xx,x);
1015   VecRestoreArray_Fast(yy,y);
1016   VecRestoreArray_Fast(zz,z);
1017   PLogFlops(32*a->nz);
1018   return 0;
1019 }
1020 
1021 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1022 {
1023   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1024   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1025   register Scalar x1,x2,x3,x4,x5;
1026   int             mbs=a->mbs,i,*idx,*ii,j,n;
1027 
1028   VecGetArray_Fast(xx,x);
1029   VecGetArray_Fast(yy,y);
1030   VecGetArray_Fast(zz,z);
1031 
1032   idx   = a->j;
1033   v     = a->a;
1034   ii    = a->i;
1035 
1036   for ( i=0; i<mbs; i++ ) {
1037     n  = ii[1] - ii[0]; ii++;
1038     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1039     for ( j=0; j<n; j++ ) {
1040       xb = x + 5*(*idx++);
1041       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1042       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1043       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1044       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1045       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1046       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1047       v += 25;
1048     }
1049     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1050     z += 5; y += 5;
1051   }
1052   VecRestoreArray_Fast(xx,x);
1053   VecRestoreArray_Fast(yy,y);
1054   VecRestoreArray_Fast(zz,z);
1055   PLogFlops(50*a->nz);
1056   return 0;
1057 }
1058 
1059 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1060 {
1061   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1062   register Scalar *x,*z,*v,*xb;
1063   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1064   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1065 
1066   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1067 
1068   VecGetArray_Fast(xx,x);
1069   VecGetArray_Fast(zz,z);
1070 
1071   idx   = a->j;
1072   v     = a->a;
1073   ii    = a->i;
1074 
1075 
1076   if (!a->mult_work) {
1077     k = PetscMax(a->m,a->n);
1078     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1079   }
1080   work = a->mult_work;
1081   for ( i=0; i<mbs; i++ ) {
1082     n     = ii[1] - ii[0]; ii++;
1083     ncols = n*bs;
1084     workt = work;
1085     for ( j=0; j<n; j++ ) {
1086       xb = x + bs*(*idx++);
1087       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1088       workt += bs;
1089     }
1090     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1091     v += n*bs2;
1092     z += bs;
1093   }
1094   VecRestoreArray_Fast(xx,x);
1095   VecRestoreArray_Fast(zz,z);
1096   PLogFlops(2*a->nz*bs2 );
1097   return 0;
1098 }
1099 
1100 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1101 {
1102   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1103   Scalar          *xg,*zg,*zb;
1104   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1105   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1106   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1107 
1108 
1109   VecGetArray_Fast(xx,xg); x = xg;
1110   VecGetArray_Fast(zz,zg); z = zg;
1111   PetscMemzero(z,N*sizeof(Scalar));
1112 
1113   idx   = a->j;
1114   v     = a->a;
1115   ii    = a->i;
1116 
1117   switch (bs) {
1118   case 1:
1119     for ( i=0; i<mbs; i++ ) {
1120       n  = ii[1] - ii[0]; ii++;
1121       xb = x + i; x1 = xb[0];
1122       ib = idx + ai[i];
1123       for ( j=0; j<n; j++ ) {
1124         rval    = ib[j];
1125         z[rval] += *v++ * x1;
1126       }
1127     }
1128     break;
1129   case 2:
1130     for ( i=0; i<mbs; i++ ) {
1131       n  = ii[1] - ii[0]; ii++;
1132       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1133       ib = idx + ai[i];
1134       for ( j=0; j<n; j++ ) {
1135         rval      = ib[j]*2;
1136         z[rval++] += v[0]*x1 + v[1]*x2;
1137         z[rval++] += v[2]*x1 + v[3]*x2;
1138         v += 4;
1139       }
1140     }
1141     break;
1142   case 3:
1143     for ( i=0; i<mbs; i++ ) {
1144       n  = ii[1] - ii[0]; ii++;
1145       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1146       ib = idx + ai[i];
1147       for ( j=0; j<n; j++ ) {
1148         rval      = ib[j]*3;
1149         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1150         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1151         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1152         v += 9;
1153       }
1154     }
1155     break;
1156   case 4:
1157     for ( i=0; i<mbs; i++ ) {
1158       n  = ii[1] - ii[0]; ii++;
1159       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1160       ib = idx + ai[i];
1161       for ( j=0; j<n; j++ ) {
1162         rval      = ib[j]*4;
1163         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1164         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1165         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1166         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1167         v += 16;
1168       }
1169     }
1170     break;
1171   case 5:
1172     for ( i=0; i<mbs; i++ ) {
1173       n  = ii[1] - ii[0]; ii++;
1174       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1175       x4 = xb[3];   x5 = xb[4];
1176       ib = idx + ai[i];
1177       for ( j=0; j<n; j++ ) {
1178         rval      = ib[j]*5;
1179         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1180         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1181         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1182         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1183         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1184         v += 25;
1185       }
1186     }
1187     break;
1188       /* block sizes larger then 5 by 5 are handled by BLAS */
1189     default: {
1190       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1191       if (!a->mult_work) {
1192         k = PetscMax(a->m,a->n);
1193         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1194         CHKPTRQ(a->mult_work);
1195       }
1196       work = a->mult_work;
1197       for ( i=0; i<mbs; i++ ) {
1198         n     = ii[1] - ii[0]; ii++;
1199         ncols = n*bs;
1200         PetscMemzero(work,ncols*sizeof(Scalar));
1201         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1202         v += n*bs2;
1203         x += bs;
1204         workt = work;
1205         for ( j=0; j<n; j++ ) {
1206           zb = z + bs*(*idx++);
1207           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1208           workt += bs;
1209         }
1210       }
1211     }
1212   }
1213   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1214   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1215   PLogFlops(2*a->nz*a->bs2 - a->n);
1216   return 0;
1217 }
1218 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1219 {
1220   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1221   Scalar          *xg,*zg,*zb;
1222   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1223   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1224   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1225 
1226 
1227 
1228   VecGetArray_Fast(xx,xg); x = xg;
1229   VecGetArray_Fast(zz,zg); z = zg;
1230 
1231   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1232   else PetscMemzero(z,N*sizeof(Scalar));
1233 
1234 
1235   idx   = a->j;
1236   v     = a->a;
1237   ii    = a->i;
1238 
1239   switch (bs) {
1240   case 1:
1241     for ( i=0; i<mbs; i++ ) {
1242       n  = ii[1] - ii[0]; ii++;
1243       xb = x + i; x1 = xb[0];
1244       ib = idx + ai[i];
1245       for ( j=0; j<n; j++ ) {
1246         rval    = ib[j];
1247         z[rval] += *v++ * x1;
1248       }
1249     }
1250     break;
1251   case 2:
1252     for ( i=0; i<mbs; i++ ) {
1253       n  = ii[1] - ii[0]; ii++;
1254       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1255       ib = idx + ai[i];
1256       for ( j=0; j<n; j++ ) {
1257         rval      = ib[j]*2;
1258         z[rval++] += v[0]*x1 + v[1]*x2;
1259         z[rval++] += v[2]*x1 + v[3]*x2;
1260         v += 4;
1261       }
1262     }
1263     break;
1264   case 3:
1265     for ( i=0; i<mbs; i++ ) {
1266       n  = ii[1] - ii[0]; ii++;
1267       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1268       ib = idx + ai[i];
1269       for ( j=0; j<n; j++ ) {
1270         rval      = ib[j]*3;
1271         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1272         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1273         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1274         v += 9;
1275       }
1276     }
1277     break;
1278   case 4:
1279     for ( i=0; i<mbs; i++ ) {
1280       n  = ii[1] - ii[0]; ii++;
1281       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1282       ib = idx + ai[i];
1283       for ( j=0; j<n; j++ ) {
1284         rval      = ib[j]*4;
1285         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1286         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1287         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1288         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1289         v += 16;
1290       }
1291     }
1292     break;
1293   case 5:
1294     for ( i=0; i<mbs; i++ ) {
1295       n  = ii[1] - ii[0]; ii++;
1296       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1297       x4 = xb[3];   x5 = xb[4];
1298       ib = idx + ai[i];
1299       for ( j=0; j<n; j++ ) {
1300         rval      = ib[j]*5;
1301         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1302         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1303         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1304         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1305         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1306         v += 25;
1307       }
1308     }
1309     break;
1310       /* block sizes larger then 5 by 5 are handled by BLAS */
1311     default: {
1312       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1313       if (!a->mult_work) {
1314         k = PetscMax(a->m,a->n);
1315         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1316         CHKPTRQ(a->mult_work);
1317       }
1318       work = a->mult_work;
1319       for ( i=0; i<mbs; i++ ) {
1320         n     = ii[1] - ii[0]; ii++;
1321         ncols = n*bs;
1322         PetscMemzero(work,ncols*sizeof(Scalar));
1323         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1324         v += n*bs2;
1325         x += bs;
1326         workt = work;
1327         for ( j=0; j<n; j++ ) {
1328           zb = z + bs*(*idx++);
1329           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1330           workt += bs;
1331         }
1332       }
1333     }
1334   }
1335   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1336   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1337   PLogFlops(2*a->nz*a->bs2);
1338   return 0;
1339 }
1340 
1341 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1342 {
1343   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1344 
1345   info->rows_global    = (double)a->m;
1346   info->columns_global = (double)a->n;
1347   info->rows_local     = (double)a->m;
1348   info->columns_local  = (double)a->n;
1349   info->block_size     = a->bs2;
1350   info->nz_allocated   = a->maxnz;
1351   info->nz_used        = a->bs2*a->nz;
1352   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1353   /*  if (info->nz_unneeded != A->info.nz_unneeded)
1354     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
1355   info->assemblies   = A->num_ass;
1356   info->mallocs      = a->reallocs;
1357   info->memory       = A->mem;
1358   if (A->factor) {
1359     info->fill_ratio_given  = A->info.fill_ratio_given;
1360     info->fill_ratio_needed = A->info.fill_ratio_needed;
1361     info->factor_mallocs    = A->info.factor_mallocs;
1362   } else {
1363     info->fill_ratio_given  = 0;
1364     info->fill_ratio_needed = 0;
1365     info->factor_mallocs    = 0;
1366   }
1367   return 0;
1368 }
1369 
1370 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
1371 {
1372   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1373 
1374   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
1375 
1376   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1377   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1378       (a->nz != b->nz)) {
1379     *flg = PETSC_FALSE; return 0;
1380   }
1381 
1382   /* if the a->i are the same */
1383   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
1384     *flg = PETSC_FALSE; return 0;
1385   }
1386 
1387   /* if a->j are the same */
1388   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
1389     *flg = PETSC_FALSE; return 0;
1390   }
1391 
1392   /* if a->a are the same */
1393   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
1394     *flg = PETSC_FALSE; return 0;
1395   }
1396   *flg = PETSC_TRUE;
1397   return 0;
1398 
1399 }
1400 
1401 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1402 {
1403   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1404   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1405   Scalar      *x, zero = 0.0,*aa,*aa_j;
1406 
1407   bs   = a->bs;
1408   aa   = a->a;
1409   ai   = a->i;
1410   aj   = a->j;
1411   ambs = a->mbs;
1412   bs2  = a->bs2;
1413 
1414   VecSet(&zero,v);
1415   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1416   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
1417   for ( i=0; i<ambs; i++ ) {
1418     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1419       if (aj[j] == i) {
1420         row  = i*bs;
1421         aa_j = aa+j*bs2;
1422         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1423         break;
1424       }
1425     }
1426   }
1427   return 0;
1428 }
1429 
1430 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1431 {
1432   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1433   Scalar      *l,*r,x,*v,*aa,*li,*ri;
1434   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1435 
1436   ai  = a->i;
1437   aj  = a->j;
1438   aa  = a->a;
1439   m   = a->m;
1440   n   = a->n;
1441   bs  = a->bs;
1442   mbs = a->mbs;
1443   bs2 = a->bs2;
1444   if (ll) {
1445     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1446     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
1447     for ( i=0; i<mbs; i++ ) { /* for each block row */
1448       M  = ai[i+1] - ai[i];
1449       li = l + i*bs;
1450       v  = aa + bs2*ai[i];
1451       for ( j=0; j<M; j++ ) { /* for each block */
1452         for ( k=0; k<bs2; k++ ) {
1453           (*v++) *= li[k%bs];
1454         }
1455       }
1456     }
1457   }
1458 
1459   if (rr) {
1460     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1461     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
1462     for ( i=0; i<mbs; i++ ) { /* for each block row */
1463       M  = ai[i+1] - ai[i];
1464       v  = aa + bs2*ai[i];
1465       for ( j=0; j<M; j++ ) { /* for each block */
1466         ri = r + bs*aj[ai[i]+j];
1467         for ( k=0; k<bs; k++ ) {
1468           x = ri[k];
1469           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
1470         }
1471       }
1472     }
1473   }
1474   return 0;
1475 }
1476 
1477 
1478 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1479 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1480 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1481 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1482 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1483 
1484 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1485 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1486 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1487 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1488 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1489 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1490 
1491 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1492 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1493 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1494 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1495 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1496 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1497 
1498 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
1499 {
1500   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1501   Scalar      *v = a->a;
1502   double      sum = 0.0;
1503   int         i,nz=a->nz,bs2=a->bs2;
1504 
1505   if (type == NORM_FROBENIUS) {
1506     for (i=0; i< bs2*nz; i++ ) {
1507 #if defined(PETSC_COMPLEX)
1508       sum += real(conj(*v)*(*v)); v++;
1509 #else
1510       sum += (*v)*(*v); v++;
1511 #endif
1512     }
1513     *norm = sqrt(sum);
1514   }
1515   else {
1516     SETERRQ(PETSC_ERR_SUP,"MatNorm_SeqBAIJ:No support for this norm yet");
1517   }
1518   return 0;
1519 }
1520 
1521 /*
1522      note: This can only work for identity for row and col. It would
1523    be good to check this and otherwise generate an error.
1524 */
1525 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1526 {
1527   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1528   Mat         outA;
1529   int         ierr;
1530 
1531   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
1532 
1533   outA          = inA;
1534   inA->factor   = FACTOR_LU;
1535   a->row        = row;
1536   a->col        = col;
1537 
1538   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1539 
1540   if (!a->diag) {
1541     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1542   }
1543   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1544   return 0;
1545 }
1546 
1547 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1548 {
1549   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1550   int         one = 1, totalnz = a->bs2*a->nz;
1551   BLscal_( &totalnz, alpha, a->a, &one );
1552   PLogFlops(totalnz);
1553   return 0;
1554 }
1555 
1556 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1557 {
1558   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1559   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1560   int        *ai = a->i, *ailen = a->ilen;
1561   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1562   Scalar     *ap, *aa = a->a, zero = 0.0;
1563 
1564   for ( k=0; k<m; k++ ) { /* loop over rows */
1565     row  = im[k]; brow = row/bs;
1566     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
1567     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1568     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1569     nrow = ailen[brow];
1570     for ( l=0; l<n; l++ ) { /* loop over columns */
1571       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
1572       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1573       col  = in[l] ;
1574       bcol = col/bs;
1575       cidx = col%bs;
1576       ridx = row%bs;
1577       high = nrow;
1578       low  = 0; /* assume unsorted */
1579       while (high-low > 5) {
1580         t = (low+high)/2;
1581         if (rp[t] > bcol) high = t;
1582         else             low  = t;
1583       }
1584       for ( i=low; i<high; i++ ) {
1585         if (rp[i] > bcol) break;
1586         if (rp[i] == bcol) {
1587           *v++ = ap[bs2*i+bs*cidx+ridx];
1588           goto finished;
1589         }
1590       }
1591       *v++ = zero;
1592       finished:;
1593     }
1594   }
1595   return 0;
1596 }
1597 
1598 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
1599 {
1600   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
1601   *bs = baij->bs;
1602   return 0;
1603 }
1604 
1605 /* idx should be of length atlease bs */
1606 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1607 {
1608   int i,row;
1609   row = idx[0];
1610   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1611 
1612   for ( i=1; i<bs; i++ ) {
1613     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1614   }
1615   *flg = PETSC_TRUE;
1616   return 0;
1617 }
1618 
1619 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1620 {
1621   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1622   IS          is_local;
1623   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1624   PetscTruth  flg;
1625   Scalar      zero = 0.0,*aa;
1626 
1627   /* Make a copy of the IS and  sort it */
1628   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1629   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1630   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1631   ierr = ISSort(is_local); CHKERRQ(ierr);
1632   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1633 
1634   i = 0;
1635   while (i < is_n) {
1636     if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range");
1637     flg = PETSC_FALSE;
1638     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1639     if (flg) { /* There exists a block of rows to be Zerowed */
1640       baij->ilen[rows[i]/bs] = 0;
1641       i += bs;
1642     } else { /* Zero out only the requested row */
1643       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1644       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1645       for ( j=0; j<count; j++ ) {
1646         aa[0] = zero;
1647         aa+=bs;
1648       }
1649       i++;
1650     }
1651   }
1652   if (diag) {
1653     for ( j=0; j<is_n; j++ ) {
1654       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1655     }
1656   }
1657   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1658   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1659   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1660   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1661 
1662   return 0;
1663 }
1664 int MatPrintHelp_SeqBAIJ(Mat A)
1665 {
1666   static int called = 0;
1667   MPI_Comm   comm = A->comm;
1668 
1669   if (called) return 0; else called = 1;
1670   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1671   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1672   return 0;
1673 }
1674 
1675 /* -------------------------------------------------------------------*/
1676 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1677        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1678        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1679        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1680        MatSolve_SeqBAIJ_N,0,
1681        0,0,
1682        MatLUFactor_SeqBAIJ,0,
1683        0,
1684        MatTranspose_SeqBAIJ,
1685        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1686        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1687        0,MatAssemblyEnd_SeqBAIJ,
1688        0,
1689        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
1690        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1691        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1692        MatILUFactorSymbolic_SeqBAIJ,0,
1693        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1694        MatConvertSameType_SeqBAIJ,0,0,
1695        MatILUFactor_SeqBAIJ,0,0,
1696        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
1697        MatGetValues_SeqBAIJ,0,
1698        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
1699        0,0,0,MatGetBlockSize_SeqBAIJ,
1700        MatGetRowIJ_SeqBAIJ,
1701        MatRestoreRowIJ_SeqBAIJ};
1702 
1703 /*@C
1704    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1705    compressed row) format.  For good matrix assembly performance the
1706    user should preallocate the matrix storage by setting the parameter nz
1707    (or the array nzz).  By setting these parameters accurately, performance
1708    during matrix assembly can be increased by more than a factor of 50.
1709 
1710    Input Parameters:
1711 .  comm - MPI communicator, set to MPI_COMM_SELF
1712 .  bs - size of block
1713 .  m - number of rows
1714 .  n - number of columns
1715 .  nz - number of block nonzeros per block row (same for all rows)
1716 .  nzz - array containing the number of block nonzeros in the various block rows
1717          (possibly different for each block row) or PETSC_NULL
1718 
1719    Output Parameter:
1720 .  A - the matrix
1721 
1722    Notes:
1723    The block AIJ format is fully compatible with standard Fortran 77
1724    storage.  That is, the stored row and column indices can begin at
1725    either one (as in Fortran) or zero.  See the users' manual for details.
1726 
1727    Specify the preallocated storage with either nz or nnz (not both).
1728    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1729    allocation.  For additional details, see the users manual chapter on
1730    matrices.
1731 
1732 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1733 @*/
1734 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1735 {
1736   Mat         B;
1737   Mat_SeqBAIJ *b;
1738   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
1739 
1740   MPI_Comm_size(comm,&size);
1741   if (size > 1) SETERRQ(1,"MatCreateSeqBAIJ:Comm must be of size 1");
1742 
1743   if (mbs*bs!=m || nbs*bs!=n)
1744     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
1745 
1746   *A = 0;
1747   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1748   PLogObjectCreate(B);
1749   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1750   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1751   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1752   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1753   if (!flg) {
1754     switch (bs) {
1755       case 1:
1756         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1757         B->ops.solve           = MatSolve_SeqBAIJ_1;
1758         B->ops.mult            = MatMult_SeqBAIJ_1;
1759         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
1760        break;
1761       case 2:
1762         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1763         B->ops.solve           = MatSolve_SeqBAIJ_2;
1764         B->ops.mult            = MatMult_SeqBAIJ_2;
1765         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
1766         break;
1767       case 3:
1768         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1769         B->ops.solve           = MatSolve_SeqBAIJ_3;
1770         B->ops.mult            = MatMult_SeqBAIJ_3;
1771         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
1772         break;
1773       case 4:
1774         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1775         B->ops.solve           = MatSolve_SeqBAIJ_4;
1776         B->ops.mult            = MatMult_SeqBAIJ_4;
1777         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
1778         break;
1779       case 5:
1780         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1781         B->ops.solve           = MatSolve_SeqBAIJ_5;
1782         B->ops.mult            = MatMult_SeqBAIJ_5;
1783         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
1784         break;
1785     }
1786   }
1787   B->destroy          = MatDestroy_SeqBAIJ;
1788   B->view             = MatView_SeqBAIJ;
1789   B->factor           = 0;
1790   B->lupivotthreshold = 1.0;
1791   B->mapping          = 0;
1792   b->row              = 0;
1793   b->col              = 0;
1794   b->reallocs         = 0;
1795 
1796   b->m       = m; B->m = m; B->M = m;
1797   b->n       = n; B->n = n; B->N = n;
1798   b->mbs     = mbs;
1799   b->nbs     = nbs;
1800   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1801   if (nnz == PETSC_NULL) {
1802     if (nz == PETSC_DEFAULT) nz = 5;
1803     else if (nz <= 0)        nz = 1;
1804     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1805     nz = nz*mbs;
1806   }
1807   else {
1808     nz = 0;
1809     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1810   }
1811 
1812   /* allocate the matrix space */
1813   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1814   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1815   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1816   b->j  = (int *) (b->a + nz*bs2);
1817   PetscMemzero(b->j,nz*sizeof(int));
1818   b->i  = b->j + nz;
1819   b->singlemalloc = 1;
1820 
1821   b->i[0] = 0;
1822   for (i=1; i<mbs+1; i++) {
1823     b->i[i] = b->i[i-1] + b->imax[i-1];
1824   }
1825 
1826   /* b->ilen will count nonzeros in each block row so far. */
1827   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1828   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1829   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1830 
1831   b->bs               = bs;
1832   b->bs2              = bs2;
1833   b->mbs              = mbs;
1834   b->nz               = 0;
1835   b->maxnz            = nz*bs2;
1836   b->sorted           = 0;
1837   b->roworiented      = 1;
1838   b->nonew            = 0;
1839   b->diag             = 0;
1840   b->solve_work       = 0;
1841   b->mult_work        = 0;
1842   b->spptr            = 0;
1843   B->info.nz_unneeded = (double)b->maxnz;
1844 
1845   *A = B;
1846   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1847   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1848   return 0;
1849 }
1850 
1851 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1852 {
1853   Mat         C;
1854   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1855   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1856 
1857   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1858 
1859   *B = 0;
1860   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1861   PLogObjectCreate(C);
1862   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1863   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1864   C->destroy    = MatDestroy_SeqBAIJ;
1865   C->view       = MatView_SeqBAIJ;
1866   C->factor     = A->factor;
1867   c->row        = 0;
1868   c->col        = 0;
1869   C->assembled  = PETSC_TRUE;
1870 
1871   c->m = C->m   = a->m;
1872   c->n = C->n   = a->n;
1873   C->M          = a->m;
1874   C->N          = a->n;
1875 
1876   c->bs         = a->bs;
1877   c->bs2        = a->bs2;
1878   c->mbs        = a->mbs;
1879   c->nbs        = a->nbs;
1880 
1881   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1882   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1883   for ( i=0; i<mbs; i++ ) {
1884     c->imax[i] = a->imax[i];
1885     c->ilen[i] = a->ilen[i];
1886   }
1887 
1888   /* allocate the matrix space */
1889   c->singlemalloc = 1;
1890   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1891   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1892   c->j  = (int *) (c->a + nz*bs2);
1893   c->i  = c->j + nz;
1894   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1895   if (mbs > 0) {
1896     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1897     if (cpvalues == COPY_VALUES) {
1898       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1899     }
1900   }
1901 
1902   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1903   c->sorted      = a->sorted;
1904   c->roworiented = a->roworiented;
1905   c->nonew       = a->nonew;
1906 
1907   if (a->diag) {
1908     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1909     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1910     for ( i=0; i<mbs; i++ ) {
1911       c->diag[i] = a->diag[i];
1912     }
1913   }
1914   else c->diag          = 0;
1915   c->nz                 = a->nz;
1916   c->maxnz              = a->maxnz;
1917   c->solve_work         = 0;
1918   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1919   c->mult_work          = 0;
1920   *B = C;
1921   return 0;
1922 }
1923 
1924 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1925 {
1926   Mat_SeqBAIJ  *a;
1927   Mat          B;
1928   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1929   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1930   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1931   int          *masked, nmask,tmp,bs2,ishift;
1932   Scalar       *aa;
1933   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1934 
1935   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1936   bs2  = bs*bs;
1937 
1938   MPI_Comm_size(comm,&size);
1939   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1940   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1941   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1942   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1943   M = header[1]; N = header[2]; nz = header[3];
1944 
1945   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1946 
1947   /*
1948      This code adds extra rows to make sure the number of rows is
1949     divisible by the blocksize
1950   */
1951   mbs        = M/bs;
1952   extra_rows = bs - M + bs*(mbs);
1953   if (extra_rows == bs) extra_rows = 0;
1954   else                  mbs++;
1955   if (extra_rows) {
1956     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1957   }
1958 
1959   /* read in row lengths */
1960   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1961   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1962   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1963 
1964   /* read in column indices */
1965   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1966   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1967   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1968 
1969   /* loop over row lengths determining block row lengths */
1970   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1971   PetscMemzero(browlengths,mbs*sizeof(int));
1972   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1973   PetscMemzero(mask,mbs*sizeof(int));
1974   masked = mask + mbs;
1975   rowcount = 0; nzcount = 0;
1976   for ( i=0; i<mbs; i++ ) {
1977     nmask = 0;
1978     for ( j=0; j<bs; j++ ) {
1979       kmax = rowlengths[rowcount];
1980       for ( k=0; k<kmax; k++ ) {
1981         tmp = jj[nzcount++]/bs;
1982         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1983       }
1984       rowcount++;
1985     }
1986     browlengths[i] += nmask;
1987     /* zero out the mask elements we set */
1988     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1989   }
1990 
1991   /* create our matrix */
1992   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1993          CHKERRQ(ierr);
1994   B = *A;
1995   a = (Mat_SeqBAIJ *) B->data;
1996 
1997   /* set matrix "i" values */
1998   a->i[0] = 0;
1999   for ( i=1; i<= mbs; i++ ) {
2000     a->i[i]      = a->i[i-1] + browlengths[i-1];
2001     a->ilen[i-1] = browlengths[i-1];
2002   }
2003   a->nz         = 0;
2004   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
2005 
2006   /* read in nonzero values */
2007   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
2008   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
2009   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2010 
2011   /* set "a" and "j" values into matrix */
2012   nzcount = 0; jcount = 0;
2013   for ( i=0; i<mbs; i++ ) {
2014     nzcountb = nzcount;
2015     nmask    = 0;
2016     for ( j=0; j<bs; j++ ) {
2017       kmax = rowlengths[i*bs+j];
2018       for ( k=0; k<kmax; k++ ) {
2019         tmp = jj[nzcount++]/bs;
2020 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2021       }
2022       rowcount++;
2023     }
2024     /* sort the masked values */
2025     PetscSortInt(nmask,masked);
2026 
2027     /* set "j" values into matrix */
2028     maskcount = 1;
2029     for ( j=0; j<nmask; j++ ) {
2030       a->j[jcount++]  = masked[j];
2031       mask[masked[j]] = maskcount++;
2032     }
2033     /* set "a" values into matrix */
2034     ishift = bs2*a->i[i];
2035     for ( j=0; j<bs; j++ ) {
2036       kmax = rowlengths[i*bs+j];
2037       for ( k=0; k<kmax; k++ ) {
2038         tmp    = jj[nzcountb]/bs ;
2039         block  = mask[tmp] - 1;
2040         point  = jj[nzcountb] - bs*tmp;
2041         idx    = ishift + bs2*block + j + bs*point;
2042         a->a[idx] = aa[nzcountb++];
2043       }
2044     }
2045     /* zero out the mask elements we set */
2046     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2047   }
2048   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
2049 
2050   PetscFree(rowlengths);
2051   PetscFree(browlengths);
2052   PetscFree(aa);
2053   PetscFree(jj);
2054   PetscFree(mask);
2055 
2056   B->assembled = PETSC_TRUE;
2057 
2058   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2059   if (flg) {
2060     Viewer tviewer;
2061     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2062     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
2063     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2064     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2065   }
2066   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2067   if (flg) {
2068     Viewer tviewer;
2069     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2070     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
2071     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2072     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2073   }
2074   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2075   if (flg) {
2076     Viewer tviewer;
2077     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2078     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2079     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2080   }
2081   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2082   if (flg) {
2083     Viewer tviewer;
2084     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2085     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
2086     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2087     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2088   }
2089   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2090   if (flg) {
2091     Viewer tviewer;
2092     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
2093     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
2094     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
2095     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2096   }
2097   return 0;
2098 }
2099 
2100 
2101 
2102