xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 90f02eec332fcca4c33b4e7b21cabed76bf0614e)
1 #ifndef lint
2 static char vcid[] = "$Id: baij.c,v 1.70 1996/11/13 21:20:56 bsmith Exp bsmith $";
3 #endif
4 
5 /*
6     Defines the basic matrix operations for the BAIJ (compressed row)
7   matrix storage format.
8 */
9 #include "src/mat/impls/baij/seq/baij.h"
10 #include "src/vec/vecimpl.h"
11 #include "src/inline/spops.h"
12 #include "petsc.h"
13 
14 
15 /*
16      Adds diagonal pointers to sparse matrix structure.
17 */
18 
19 int MatMarkDiag_SeqBAIJ(Mat A)
20 {
21   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
22   int         i,j, *diag, m = a->mbs;
23 
24   diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag);
25   PLogObjectMemory(A,(m+1)*sizeof(int));
26   for ( i=0; i<m; i++ ) {
27     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
28       if (a->j[j] == i) {
29         diag[i] = j;
30         break;
31       }
32     }
33   }
34   a->diag = diag;
35   return 0;
36 }
37 
38 #include "draw.h"
39 #include "pinclude/pviewer.h"
40 #include "sys.h"
41 
42 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
43 
44 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
45                             PetscTruth *done)
46 {
47   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
48   int         ierr, n = a->mbs,i;
49 
50   *nn = n;
51   if (!ia) return 0;
52   if (symmetric) {
53     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
54   } else if (oshift == 1) {
55     /* temporarily add 1 to i and j indices */
56     int nz = a->i[n] + 1;
57     for ( i=0; i<nz; i++ ) a->j[i]++;
58     for ( i=0; i<n+1; i++ ) a->i[i]++;
59     *ia = a->i; *ja = a->j;
60   } else {
61     *ia = a->i; *ja = a->j;
62   }
63 
64   return 0;
65 }
66 
67 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,
68                                 PetscTruth *done)
69 {
70   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
71   int         i,n = a->mbs;
72 
73   if (!ia) return 0;
74   if (symmetric) {
75     PetscFree(*ia);
76     PetscFree(*ja);
77   } else if (oshift == 1) {
78     int nz = a->i[n];
79     for ( i=0; i<nz; i++ ) a->j[i]--;
80     for ( i=0; i<n+1; i++ ) a->i[i]--;
81   }
82   return 0;
83 }
84 
85 
86 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer)
87 {
88   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
89   int         i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
90   Scalar      *aa;
91 
92   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
93   col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens);
94   col_lens[0] = MAT_COOKIE;
95 
96   col_lens[1] = a->m;
97   col_lens[2] = a->n;
98   col_lens[3] = a->nz*bs2;
99 
100   /* store lengths of each row and write (including header) to file */
101   count = 0;
102   for ( i=0; i<a->mbs; i++ ) {
103     for ( j=0; j<bs; j++ ) {
104       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
105     }
106   }
107   ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr);
108   PetscFree(col_lens);
109 
110   /* store column indices (zero start index) */
111   jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj);
112   count = 0;
113   for ( i=0; i<a->mbs; i++ ) {
114     for ( j=0; j<bs; j++ ) {
115       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
116         for ( l=0; l<bs; l++ ) {
117           jj[count++] = bs*a->j[k] + l;
118         }
119       }
120     }
121   }
122   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr);
123   PetscFree(jj);
124 
125   /* store nonzero values */
126   aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa);
127   count = 0;
128   for ( i=0; i<a->mbs; i++ ) {
129     for ( j=0; j<bs; j++ ) {
130       for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
131         for ( l=0; l<bs; l++ ) {
132           aa[count++] = a->a[bs2*k + l*bs + j];
133         }
134       }
135     }
136   }
137   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr);
138   PetscFree(aa);
139   return 0;
140 }
141 
142 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer)
143 {
144   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
145   int         ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2;
146   FILE        *fd;
147   char        *outputname;
148 
149   ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
150   ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr);
151   ierr = ViewerGetFormat(viewer,&format);
152   if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) {
153     fprintf(fd,"  block size is %d\n",bs);
154   }
155   else if (format == VIEWER_FORMAT_ASCII_MATLAB) {
156     SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported");
157   }
158   else if (format == VIEWER_FORMAT_ASCII_COMMON) {
159     for ( i=0; i<a->mbs; i++ ) {
160       for ( j=0; j<bs; j++ ) {
161         fprintf(fd,"row %d:",i*bs+j);
162         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
163           for ( l=0; l<bs; l++ ) {
164 #if defined(PETSC_COMPLEX)
165           if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0)
166             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
167               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
168           else if (real(a->a[bs2*k + l*bs + j]) != 0.0)
169             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
170 #else
171           if (a->a[bs2*k + l*bs + j] != 0.0)
172             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
173 #endif
174           }
175         }
176         fprintf(fd,"\n");
177       }
178     }
179   }
180   else {
181     for ( i=0; i<a->mbs; i++ ) {
182       for ( j=0; j<bs; j++ ) {
183         fprintf(fd,"row %d:",i*bs+j);
184         for ( k=a->i[i]; k<a->i[i+1]; k++ ) {
185           for ( l=0; l<bs; l++ ) {
186 #if defined(PETSC_COMPLEX)
187           if (imag(a->a[bs2*k + l*bs + j]) != 0.0) {
188             fprintf(fd," %d %g + %g i",bs*a->j[k]+l,
189               real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j]));
190           }
191           else {
192             fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j]));
193           }
194 #else
195             fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
196 #endif
197           }
198         }
199         fprintf(fd,"\n");
200       }
201     }
202   }
203   fflush(fd);
204   return 0;
205 }
206 
207 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer)
208 {
209   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ *) A->data;
210   int          row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2;
211   double       xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r;
212   Scalar       *aa;
213   Draw         draw;
214   DrawButton   button;
215   PetscTruth   isnull;
216 
217   ViewerDrawGetDraw(viewer,&draw);
218   ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0;
219 
220   xr  = a->n; yr = a->m; h = yr/10.0; w = xr/10.0;
221   xr += w;    yr += h;  xl = -w;     yl = -h;
222   ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
223   /* loop over matrix elements drawing boxes */
224   color = DRAW_BLUE;
225   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
226     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
227       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
228       x_l = a->j[j]*bs; x_r = x_l + 1.0;
229       aa = a->a + j*bs2;
230       for ( k=0; k<bs; k++ ) {
231         for ( l=0; l<bs; l++ ) {
232           if (PetscReal(*aa++) >=  0.) continue;
233           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
234         }
235       }
236     }
237   }
238   color = DRAW_CYAN;
239   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
240     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
241       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
242       x_l = a->j[j]*bs; x_r = x_l + 1.0;
243       aa = a->a + j*bs2;
244       for ( k=0; k<bs; k++ ) {
245         for ( l=0; l<bs; l++ ) {
246           if (PetscReal(*aa++) != 0.) continue;
247           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
248         }
249       }
250     }
251   }
252 
253   color = DRAW_RED;
254   for ( i=0,row=0; i<mbs; i++,row+=bs ) {
255     for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
256       y_l = a->m - row - 1.0; y_r = y_l + 1.0;
257       x_l = a->j[j]*bs; x_r = x_l + 1.0;
258       aa = a->a + j*bs2;
259       for ( k=0; k<bs; k++ ) {
260         for ( l=0; l<bs; l++ ) {
261           if (PetscReal(*aa++) <= 0.) continue;
262           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
263         }
264       }
265     }
266   }
267 
268   DrawFlush(draw);
269   DrawGetPause(draw,&pause);
270   if (pause >= 0) { PetscSleep(pause); return 0;}
271 
272   /* allow the matrix to zoom or shrink */
273   ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
274   while (button != BUTTON_RIGHT) {
275     DrawClear(draw);
276     if (button == BUTTON_LEFT) scale = .5;
277     else if (button == BUTTON_CENTER) scale = 2.;
278     xl = scale*(xl + w - xc) + xc - w*scale;
279     xr = scale*(xr - w - xc) + xc + w*scale;
280     yl = scale*(yl + h - yc) + yc - h*scale;
281     yr = scale*(yr - h - yc) + yc + h*scale;
282     w *= scale; h *= scale;
283     ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr);
284     color = DRAW_BLUE;
285     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
286       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
287         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
288         x_l = a->j[j]*bs; x_r = x_l + 1.0;
289         aa = a->a + j*bs2;
290         for ( k=0; k<bs; k++ ) {
291           for ( l=0; l<bs; l++ ) {
292             if (PetscReal(*aa++) >=  0.) continue;
293             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
294           }
295         }
296       }
297     }
298     color = DRAW_CYAN;
299     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
300       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
301         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
302         x_l = a->j[j]*bs; x_r = x_l + 1.0;
303         aa = a->a + j*bs2;
304         for ( k=0; k<bs; k++ ) {
305           for ( l=0; l<bs; l++ ) {
306           if (PetscReal(*aa++) != 0.) continue;
307           DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
308           }
309         }
310       }
311     }
312 
313     color = DRAW_RED;
314     for ( i=0,row=0; i<mbs; i++,row+=bs ) {
315       for ( j=a->i[i]; j<a->i[i+1]; j++ ) {
316         y_l = a->m - row - 1.0; y_r = y_l + 1.0;
317         x_l = a->j[j]*bs; x_r = x_l + 1.0;
318         aa = a->a + j*bs2;
319         for ( k=0; k<bs; k++ ) {
320           for ( l=0; l<bs; l++ ) {
321             if (PetscReal(*aa++) <= 0.) continue;
322             DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color);
323           }
324         }
325       }
326     }
327     ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0);
328   }
329   return 0;
330 }
331 
332 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer)
333 {
334   Mat         A = (Mat) obj;
335   ViewerType  vtype;
336   int         ierr;
337 
338   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
339   if (vtype == MATLAB_VIEWER) {
340     SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported");
341   }
342   else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){
343     return MatView_SeqBAIJ_ASCII(A,viewer);
344   }
345   else if (vtype == BINARY_FILE_VIEWER) {
346     return MatView_SeqBAIJ_Binary(A,viewer);
347   }
348   else if (vtype == DRAW_VIEWER) {
349     return MatView_SeqBAIJ_Draw(A,viewer);
350   }
351   return 0;
352 }
353 
354 #define CHUNKSIZE  10
355 
356 /* This version has row oriented v  */
357 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is)
358 {
359   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
360   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
361   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented;
362   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
363   int          ridx,cidx,bs2=a->bs2;
364   Scalar      *ap,value,*aa=a->a,*bap;
365 
366   for ( k=0; k<m; k++ ) { /* loop over added rows */
367     row  = im[k]; brow = row/bs;
368 #if defined(PETSC_BOPT_g)
369     if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row");
370     if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large");
371 #endif
372     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
373     rmax = imax[brow]; nrow = ailen[brow];
374     low = 0;
375     for ( l=0; l<n; l++ ) { /* loop over added columns */
376 #if defined(PETSC_BOPT_g)
377       if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column");
378       if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large");
379 #endif
380       col = in[l]; bcol = col/bs;
381       ridx = row % bs; cidx = col % bs;
382       if (roworiented) {
383         value = *v++;
384       }
385       else {
386         value = v[k + l*m];
387       }
388       if (!sorted) low = 0; high = nrow;
389       while (high-low > 5) {
390         t = (low+high)/2;
391         if (rp[t] > bcol) high = t;
392         else              low  = t;
393       }
394       for ( i=low; i<high; i++ ) {
395         if (rp[i] > bcol) break;
396         if (rp[i] == bcol) {
397           bap  = ap +  bs2*i + bs*cidx + ridx;
398           if (is == ADD_VALUES) *bap += value;
399           else                  *bap  = value;
400           goto noinsert;
401         }
402       }
403       if (nonew) goto noinsert;
404       if (nrow >= rmax) {
405         /* there is no extra room in row, therefore enlarge */
406         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
407         Scalar *new_a;
408 
409         /* malloc new storage space */
410         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int);
411         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a);
412         new_j   = (int *) (new_a + bs2*new_nz);
413         new_i   = new_j + new_nz;
414 
415         /* copy over old data into new slots */
416         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];}
417         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;}
418         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
419         len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
420         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,
421                                                            len*sizeof(int));
422         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));
423         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));
424         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
425                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));
426         /* free up old matrix storage */
427         PetscFree(a->a);
428         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);}
429         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
430         a->singlemalloc = 1;
431 
432         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
433         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
434         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar)));
435         a->maxnz += bs2*CHUNKSIZE;
436         a->reallocs++;
437         a->nz++;
438       }
439       N = nrow++ - 1;
440       /* shift up all the later entries in this row */
441       for ( ii=N; ii>=i; ii-- ) {
442         rp[ii+1] = rp[ii];
443         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));
444       }
445       if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar));
446       rp[i]                      = bcol;
447       ap[bs2*i + bs*cidx + ridx] = value;
448       noinsert:;
449       low = i;
450     }
451     ailen[brow] = nrow;
452   }
453   return 0;
454 }
455 
456 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n)
457 {
458   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
459   *m = a->m; *n = a->n;
460   return 0;
461 }
462 
463 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n)
464 {
465   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
466   *m = 0; *n = a->m;
467   return 0;
468 }
469 
470 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
471 {
472   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
473   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
474   Scalar      *aa,*v_i,*aa_i;
475 
476   bs  = a->bs;
477   ai  = a->i;
478   aj  = a->j;
479   aa  = a->a;
480   bs2 = a->bs2;
481 
482   if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range");
483 
484   bn  = row/bs;   /* Block number */
485   bp  = row % bs; /* Block Position */
486   M   = ai[bn+1] - ai[bn];
487   *nz = bs*M;
488 
489   if (v) {
490     *v = 0;
491     if (*nz) {
492       *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v);
493       for ( i=0; i<M; i++ ) { /* for each block in the block row */
494         v_i  = *v + i*bs;
495         aa_i = aa + bs2*(ai[bn] + i);
496         for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];}
497       }
498     }
499   }
500 
501   if (idx) {
502     *idx = 0;
503     if (*nz) {
504       *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx);
505       for ( i=0; i<M; i++ ) { /* for each block in the block row */
506         idx_i = *idx + i*bs;
507         itmp  = bs*aj[ai[bn] + i];
508         for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;}
509       }
510     }
511   }
512   return 0;
513 }
514 
515 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v)
516 {
517   if (idx) {if (*idx) PetscFree(*idx);}
518   if (v)   {if (*v)   PetscFree(*v);}
519   return 0;
520 }
521 
522 static int MatTranspose_SeqBAIJ(Mat A,Mat *B)
523 {
524   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
525   Mat         C;
526   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
527   int         *rows,*cols,bs2=a->bs2;
528   Scalar      *array=a->a;
529 
530   if (B==PETSC_NULL && mbs!=nbs)
531     SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place");
532   col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col);
533   PetscMemzero(col,(1+nbs)*sizeof(int));
534 
535   for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1;
536   ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr);
537   PetscFree(col);
538   rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows);
539   cols = rows + bs;
540   for ( i=0; i<mbs; i++ ) {
541     cols[0] = i*bs;
542     for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1;
543     len = ai[i+1] - ai[i];
544     for ( j=0; j<len; j++ ) {
545       rows[0] = (*aj++)*bs;
546       for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1;
547       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr);
548       array += bs2;
549     }
550   }
551   PetscFree(rows);
552 
553   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
554   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
555 
556   if (B != PETSC_NULL) {
557     *B = C;
558   } else {
559     /* This isn't really an in-place transpose */
560     PetscFree(a->a);
561     if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);}
562     if (a->diag) PetscFree(a->diag);
563     if (a->ilen) PetscFree(a->ilen);
564     if (a->imax) PetscFree(a->imax);
565     if (a->solve_work) PetscFree(a->solve_work);
566     PetscFree(a);
567     PetscMemcpy(A,C,sizeof(struct _Mat));
568     PetscHeaderDestroy(C);
569   }
570   return 0;
571 }
572 
573 
574 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
575 {
576   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
577   int        fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax;
578   int        m = a->m,*ip, N, *ailen = a->ilen;
579   int        mbs = a->mbs, bs2 = a->bs2;
580   Scalar     *aa = a->a, *ap;
581 
582   if (mode == MAT_FLUSH_ASSEMBLY) return 0;
583 
584   for ( i=1; i<mbs; i++ ) {
585     /* move each row back by the amount of empty slots (fshift) before it*/
586     fshift += imax[i-1] - ailen[i-1];
587     if (fshift) {
588       ip = aj + ai[i]; ap = aa + bs2*ai[i];
589       N = ailen[i];
590       for ( j=0; j<N; j++ ) {
591         ip[j-fshift] = ip[j];
592         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar));
593       }
594     }
595     ai[i] = ai[i-1] + ailen[i-1];
596   }
597   if (mbs) {
598     fshift += imax[mbs-1] - ailen[mbs-1];
599     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
600   }
601   /* reset ilen and imax for each row */
602   for ( i=0; i<mbs; i++ ) {
603     ailen[i] = imax[i] = ai[i+1] - ai[i];
604   }
605   a->nz = ai[mbs];
606 
607   /* diagonals may have moved, so kill the diagonal pointers */
608   if (fshift && a->diag) {
609     PetscFree(a->diag);
610     PLogObjectMemory(A,-(m+1)*sizeof(int));
611     a->diag = 0;
612   }
613   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneed storage space %d used %d, rows %d, block size %d\n",
614            fshift*bs2,a->nz*bs2,m,a->bs);
615   PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n",
616            a->reallocs);
617   A->info.nz_unneeded  = (double)fshift*bs2;
618 
619   return 0;
620 }
621 
622 static int MatZeroEntries_SeqBAIJ(Mat A)
623 {
624   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
625   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar));
626   return 0;
627 }
628 
629 int MatDestroy_SeqBAIJ(PetscObject obj)
630 {
631   Mat         A  = (Mat) obj;
632   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
633   int         ierr;
634 
635 #if defined(PETSC_LOG)
636   PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz);
637 #endif
638   PetscFree(a->a);
639   if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);}
640   if (a->diag) PetscFree(a->diag);
641   if (a->ilen) PetscFree(a->ilen);
642   if (a->imax) PetscFree(a->imax);
643   if (a->solve_work) PetscFree(a->solve_work);
644   if (a->mult_work) PetscFree(a->mult_work);
645   PetscFree(a);
646   if (A->mapping) {
647     ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr);
648   }
649   PLogObjectDestroy(A);
650   PetscHeaderDestroy(A);
651   return 0;
652 }
653 
654 static int MatSetOption_SeqBAIJ(Mat A,MatOption op)
655 {
656   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
657   if      (op == MAT_ROW_ORIENTED)              a->roworiented = 1;
658   else if (op == MAT_COLUMN_ORIENTED)           a->roworiented = 0;
659   else if (op == MAT_COLUMNS_SORTED)            a->sorted      = 1;
660   else if (op == MAT_NO_NEW_NONZERO_LOCATIONS)  a->nonew       = 1;
661   else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew       = 0;
662   else if (op == MAT_ROWS_SORTED ||
663            op == MAT_SYMMETRIC ||
664            op == MAT_STRUCTURALLY_SYMMETRIC ||
665            op == MAT_YES_NEW_DIAGONALS ||
666            op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES)
667     PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n");
668   else if (op == MAT_NO_NEW_DIAGONALS)
669     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");}
670   else
671     {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");}
672   return 0;
673 }
674 
675 
676 /* -------------------------------------------------------*/
677 /* Should check that shapes of vectors and matrices match */
678 /* -------------------------------------------------------*/
679 #include "pinclude/plapack.h"
680 
681 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
682 {
683   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
684   register Scalar *x,*z,*v,sum;
685   int             mbs=a->mbs,i,*idx,*ii,n;
686 
687   VecGetArray_Fast(xx,x);
688   VecGetArray_Fast(zz,z);
689 
690   idx   = a->j;
691   v     = a->a;
692   ii    = a->i;
693 
694   for ( i=0; i<mbs; i++ ) {
695     n    = ii[1] - ii[0]; ii++;
696     sum  = 0.0;
697     while (n--) sum += *v++ * x[*idx++];
698     z[i] = sum;
699   }
700   VecRestoreArray_Fast(xx,x);
701   VecRestoreArray_Fast(zz,z);
702   PLogFlops(2*a->nz - a->m);
703   return 0;
704 }
705 
706 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
707 {
708   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
709   register Scalar *x,*z,*v,*xb,sum1,sum2;
710   register Scalar x1,x2;
711   int             mbs=a->mbs,i,*idx,*ii;
712   int             j,n;
713 
714   VecGetArray_Fast(xx,x);
715   VecGetArray_Fast(zz,z);
716 
717   idx   = a->j;
718   v     = a->a;
719   ii    = a->i;
720 
721   for ( i=0; i<mbs; i++ ) {
722     n  = ii[1] - ii[0]; ii++;
723     sum1 = 0.0; sum2 = 0.0;
724     for ( j=0; j<n; j++ ) {
725       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
726       sum1 += v[0]*x1 + v[2]*x2;
727       sum2 += v[1]*x1 + v[3]*x2;
728       v += 4;
729     }
730     z[0] = sum1; z[1] = sum2;
731     z += 2;
732   }
733   VecRestoreArray_Fast(xx,x);
734   VecRestoreArray_Fast(zz,z);
735   PLogFlops(4*a->nz - a->m);
736   return 0;
737 }
738 
739 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
740 {
741   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
742   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
743   int             mbs=a->mbs,i,*idx,*ii,j,n;
744 
745   VecGetArray_Fast(xx,x);
746   VecGetArray_Fast(zz,z);
747 
748   idx   = a->j;
749   v     = a->a;
750   ii    = a->i;
751 
752   for ( i=0; i<mbs; i++ ) {
753     n  = ii[1] - ii[0]; ii++;
754     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
755     for ( j=0; j<n; j++ ) {
756       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
757       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
758       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
759       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
760       v += 9;
761     }
762     z[0] = sum1; z[1] = sum2; z[2] = sum3;
763     z += 3;
764   }
765   VecRestoreArray_Fast(xx,x);
766   VecRestoreArray_Fast(zz,z);
767   PLogFlops(18*a->nz - a->m);
768   return 0;
769 }
770 
771 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
772 {
773   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
774   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4;
775   register Scalar x1,x2,x3,x4;
776   int             mbs=a->mbs,i,*idx,*ii;
777   int             j,n;
778 
779   VecGetArray_Fast(xx,x);
780   VecGetArray_Fast(zz,z);
781 
782   idx   = a->j;
783   v     = a->a;
784   ii    = a->i;
785 
786   for ( i=0; i<mbs; i++ ) {
787     n  = ii[1] - ii[0]; ii++;
788     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
789     for ( j=0; j<n; j++ ) {
790       xb = x + 4*(*idx++);
791       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
792       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
793       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
794       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
795       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
796       v += 16;
797     }
798     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
799     z += 4;
800   }
801   VecRestoreArray_Fast(xx,x);
802   VecRestoreArray_Fast(zz,z);
803   PLogFlops(32*a->nz - a->m);
804   return 0;
805 }
806 
807 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
808 {
809   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
810   register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
811   register Scalar x1,x2,x3,x4,x5;
812   int             mbs=a->mbs,i,*idx,*ii,j,n;
813 
814   VecGetArray_Fast(xx,x);
815   VecGetArray_Fast(zz,z);
816 
817   idx   = a->j;
818   v     = a->a;
819   ii    = a->i;
820 
821   for ( i=0; i<mbs; i++ ) {
822     n  = ii[1] - ii[0]; ii++;
823     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
824     for ( j=0; j<n; j++ ) {
825       xb = x + 5*(*idx++);
826       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
827       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
828       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
829       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
830       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
831       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
832       v += 25;
833     }
834     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
835     z += 5;
836   }
837   VecRestoreArray_Fast(xx,x);
838   VecRestoreArray_Fast(zz,z);
839   PLogFlops(50*a->nz - a->m);
840   return 0;
841 }
842 
843 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
844 {
845   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
846   register Scalar *x,*z,*v,*xb;
847   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
848   int             _One = 1,ncols,k;
849   Scalar          _DOne = 1.0,*work,*workt,_DZero = 0.0;
850 
851   VecGetArray_Fast(xx,x);
852   VecGetArray_Fast(zz,z);
853 
854   idx   = a->j;
855   v     = a->a;
856   ii    = a->i;
857 
858 
859   if (!a->mult_work) {
860     k = PetscMax(a->m,a->n);
861     a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work);
862   }
863   work = a->mult_work;
864   for ( i=0; i<mbs; i++ ) {
865     n     = ii[1] - ii[0]; ii++;
866     ncols = n*bs;
867     workt = work;
868     for ( j=0; j<n; j++ ) {
869       xb = x + bs*(*idx++);
870       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
871       workt += bs;
872     }
873     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One);
874     v += n*bs2;
875     z += bs;
876   }
877   VecRestoreArray_Fast(xx,x);
878   VecRestoreArray_Fast(zz,z);
879   PLogFlops(2*a->nz*bs2 - a->m);
880   return 0;
881 }
882 
883 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
884 {
885   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
886   register Scalar *x,*y,*z,*v,sum;
887   int             mbs=a->mbs,i,*idx,*ii,n;
888 
889   VecGetArray_Fast(xx,x);
890   VecGetArray_Fast(yy,y);
891   VecGetArray_Fast(zz,z);
892 
893   idx   = a->j;
894   v     = a->a;
895   ii    = a->i;
896 
897   for ( i=0; i<mbs; i++ ) {
898     n    = ii[1] - ii[0]; ii++;
899     sum  = y[i];
900     while (n--) sum += *v++ * x[*idx++];
901     z[i] = sum;
902   }
903   VecRestoreArray_Fast(xx,x);
904   VecRestoreArray_Fast(yy,y);
905   VecRestoreArray_Fast(zz,z);
906   PLogFlops(2*a->nz);
907   return 0;
908 }
909 
910 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
911 {
912   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
913   register Scalar *x,*y,*z,*v,*xb,sum1,sum2;
914   register Scalar x1,x2;
915   int             mbs=a->mbs,i,*idx,*ii;
916   int             j,n;
917 
918   VecGetArray_Fast(xx,x);
919   VecGetArray_Fast(yy,y);
920   VecGetArray_Fast(zz,z);
921 
922   idx   = a->j;
923   v     = a->a;
924   ii    = a->i;
925 
926   for ( i=0; i<mbs; i++ ) {
927     n  = ii[1] - ii[0]; ii++;
928     sum1 = y[0]; sum2 = y[1];
929     for ( j=0; j<n; j++ ) {
930       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
931       sum1 += v[0]*x1 + v[2]*x2;
932       sum2 += v[1]*x1 + v[3]*x2;
933       v += 4;
934     }
935     z[0] = sum1; z[1] = sum2;
936     z += 2; y += 2;
937   }
938   VecRestoreArray_Fast(xx,x);
939   VecRestoreArray_Fast(yy,y);
940   VecRestoreArray_Fast(zz,z);
941   PLogFlops(4*a->nz);
942   return 0;
943 }
944 
945 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
946 {
947   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
948   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3;
949   int             mbs=a->mbs,i,*idx,*ii,j,n;
950 
951   VecGetArray_Fast(xx,x);
952   VecGetArray_Fast(yy,y);
953   VecGetArray_Fast(zz,z);
954 
955   idx   = a->j;
956   v     = a->a;
957   ii    = a->i;
958 
959   for ( i=0; i<mbs; i++ ) {
960     n  = ii[1] - ii[0]; ii++;
961     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
962     for ( j=0; j<n; j++ ) {
963       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
964       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
965       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
966       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
967       v += 9;
968     }
969     z[0] = sum1; z[1] = sum2; z[2] = sum3;
970     z += 3; y += 3;
971   }
972   VecRestoreArray_Fast(xx,x);
973   VecRestoreArray_Fast(yy,y);
974   VecRestoreArray_Fast(zz,z);
975   PLogFlops(18*a->nz);
976   return 0;
977 }
978 
979 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
980 {
981   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
982   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4;
983   register Scalar x1,x2,x3,x4;
984   int             mbs=a->mbs,i,*idx,*ii;
985   int             j,n;
986 
987   VecGetArray_Fast(xx,x);
988   VecGetArray_Fast(yy,y);
989   VecGetArray_Fast(zz,z);
990 
991   idx   = a->j;
992   v     = a->a;
993   ii    = a->i;
994 
995   for ( i=0; i<mbs; i++ ) {
996     n  = ii[1] - ii[0]; ii++;
997     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
998     for ( j=0; j<n; j++ ) {
999       xb = x + 4*(*idx++);
1000       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1001       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1002       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1003       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1004       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1005       v += 16;
1006     }
1007     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1008     z += 4; y += 4;
1009   }
1010   VecRestoreArray_Fast(xx,x);
1011   VecRestoreArray_Fast(yy,y);
1012   VecRestoreArray_Fast(zz,z);
1013   PLogFlops(32*a->nz);
1014   return 0;
1015 }
1016 
1017 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1018 {
1019   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1020   register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5;
1021   register Scalar x1,x2,x3,x4,x5;
1022   int             mbs=a->mbs,i,*idx,*ii,j,n;
1023 
1024   VecGetArray_Fast(xx,x);
1025   VecGetArray_Fast(yy,y);
1026   VecGetArray_Fast(zz,z);
1027 
1028   idx   = a->j;
1029   v     = a->a;
1030   ii    = a->i;
1031 
1032   for ( i=0; i<mbs; i++ ) {
1033     n  = ii[1] - ii[0]; ii++;
1034     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1035     for ( j=0; j<n; j++ ) {
1036       xb = x + 5*(*idx++);
1037       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1038       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1039       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1040       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1041       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1042       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1043       v += 25;
1044     }
1045     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1046     z += 5; y += 5;
1047   }
1048   VecRestoreArray_Fast(xx,x);
1049   VecRestoreArray_Fast(yy,y);
1050   VecRestoreArray_Fast(zz,z);
1051   PLogFlops(50*a->nz);
1052   return 0;
1053 }
1054 
1055 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1056 {
1057   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1058   register Scalar *x,*z,*v,*xb;
1059   int             mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
1060   int             _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1061 
1062   if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1063 
1064   VecGetArray_Fast(xx,x);
1065   VecGetArray_Fast(zz,z);
1066 
1067   idx   = a->j;
1068   v     = a->a;
1069   ii    = a->i;
1070 
1071 
1072   if (!a->mult_work) {
1073     k = PetscMax(a->m,a->n);
1074     a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work);
1075   }
1076   work = a->mult_work;
1077   for ( i=0; i<mbs; i++ ) {
1078     n     = ii[1] - ii[0]; ii++;
1079     ncols = n*bs;
1080     workt = work;
1081     for ( j=0; j<n; j++ ) {
1082       xb = x + bs*(*idx++);
1083       for ( k=0; k<bs; k++ ) workt[k] = xb[k];
1084       workt += bs;
1085     }
1086     LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One);
1087     v += n*bs2;
1088     z += bs;
1089   }
1090   VecRestoreArray_Fast(xx,x);
1091   VecRestoreArray_Fast(zz,z);
1092   PLogFlops(2*a->nz*bs2 );
1093   return 0;
1094 }
1095 
1096 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz)
1097 {
1098   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1099   Scalar          *xg,*zg,*zb;
1100   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1101   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1102   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1103 
1104 
1105   VecGetArray_Fast(xx,xg); x = xg;
1106   VecGetArray_Fast(zz,zg); z = zg;
1107   PetscMemzero(z,N*sizeof(Scalar));
1108 
1109   idx   = a->j;
1110   v     = a->a;
1111   ii    = a->i;
1112 
1113   switch (bs) {
1114   case 1:
1115     for ( i=0; i<mbs; i++ ) {
1116       n  = ii[1] - ii[0]; ii++;
1117       xb = x + i; x1 = xb[0];
1118       ib = idx + ai[i];
1119       for ( j=0; j<n; j++ ) {
1120         rval    = ib[j];
1121         z[rval] += *v++ * x1;
1122       }
1123     }
1124     break;
1125   case 2:
1126     for ( i=0; i<mbs; i++ ) {
1127       n  = ii[1] - ii[0]; ii++;
1128       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1129       ib = idx + ai[i];
1130       for ( j=0; j<n; j++ ) {
1131         rval      = ib[j]*2;
1132         z[rval++] += v[0]*x1 + v[1]*x2;
1133         z[rval++] += v[2]*x1 + v[3]*x2;
1134         v += 4;
1135       }
1136     }
1137     break;
1138   case 3:
1139     for ( i=0; i<mbs; i++ ) {
1140       n  = ii[1] - ii[0]; ii++;
1141       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1142       ib = idx + ai[i];
1143       for ( j=0; j<n; j++ ) {
1144         rval      = ib[j]*3;
1145         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1146         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1147         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1148         v += 9;
1149       }
1150     }
1151     break;
1152   case 4:
1153     for ( i=0; i<mbs; i++ ) {
1154       n  = ii[1] - ii[0]; ii++;
1155       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1156       ib = idx + ai[i];
1157       for ( j=0; j<n; j++ ) {
1158         rval      = ib[j]*4;
1159         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1160         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1161         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1162         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1163         v += 16;
1164       }
1165     }
1166     break;
1167   case 5:
1168     for ( i=0; i<mbs; i++ ) {
1169       n  = ii[1] - ii[0]; ii++;
1170       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1171       x4 = xb[3];   x5 = xb[4];
1172       ib = idx + ai[i];
1173       for ( j=0; j<n; j++ ) {
1174         rval      = ib[j]*5;
1175         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1176         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1177         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1178         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1179         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1180         v += 25;
1181       }
1182     }
1183     break;
1184       /* block sizes larger then 5 by 5 are handled by BLAS */
1185     default: {
1186       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1187       if (!a->mult_work) {
1188         k = PetscMax(a->m,a->n);
1189         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1190         CHKPTRQ(a->mult_work);
1191       }
1192       work = a->mult_work;
1193       for ( i=0; i<mbs; i++ ) {
1194         n     = ii[1] - ii[0]; ii++;
1195         ncols = n*bs;
1196         PetscMemzero(work,ncols*sizeof(Scalar));
1197         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1198         v += n*bs2;
1199         x += bs;
1200         workt = work;
1201         for ( j=0; j<n; j++ ) {
1202           zb = z + bs*(*idx++);
1203           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1204           workt += bs;
1205         }
1206       }
1207     }
1208   }
1209   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1210   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1211   PLogFlops(2*a->nz*a->bs2 - a->n);
1212   return 0;
1213 }
1214 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1215 {
1216   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *) A->data;
1217   Scalar          *xg,*zg,*zb;
1218   register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5;
1219   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n;
1220   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1221 
1222 
1223 
1224   VecGetArray_Fast(xx,xg); x = xg;
1225   VecGetArray_Fast(zz,zg); z = zg;
1226 
1227   if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); }
1228   else PetscMemzero(z,N*sizeof(Scalar));
1229 
1230 
1231   idx   = a->j;
1232   v     = a->a;
1233   ii    = a->i;
1234 
1235   switch (bs) {
1236   case 1:
1237     for ( i=0; i<mbs; i++ ) {
1238       n  = ii[1] - ii[0]; ii++;
1239       xb = x + i; x1 = xb[0];
1240       ib = idx + ai[i];
1241       for ( j=0; j<n; j++ ) {
1242         rval    = ib[j];
1243         z[rval] += *v++ * x1;
1244       }
1245     }
1246     break;
1247   case 2:
1248     for ( i=0; i<mbs; i++ ) {
1249       n  = ii[1] - ii[0]; ii++;
1250       xb = x + 2*i; x1 = xb[0]; x2 = xb[1];
1251       ib = idx + ai[i];
1252       for ( j=0; j<n; j++ ) {
1253         rval      = ib[j]*2;
1254         z[rval++] += v[0]*x1 + v[1]*x2;
1255         z[rval++] += v[2]*x1 + v[3]*x2;
1256         v += 4;
1257       }
1258     }
1259     break;
1260   case 3:
1261     for ( i=0; i<mbs; i++ ) {
1262       n  = ii[1] - ii[0]; ii++;
1263       xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1264       ib = idx + ai[i];
1265       for ( j=0; j<n; j++ ) {
1266         rval      = ib[j]*3;
1267         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1268         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1269         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1270         v += 9;
1271       }
1272     }
1273     break;
1274   case 4:
1275     for ( i=0; i<mbs; i++ ) {
1276       n  = ii[1] - ii[0]; ii++;
1277       xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1278       ib = idx + ai[i];
1279       for ( j=0; j<n; j++ ) {
1280         rval      = ib[j]*4;
1281         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1282         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1283         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1284         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1285         v += 16;
1286       }
1287     }
1288     break;
1289   case 5:
1290     for ( i=0; i<mbs; i++ ) {
1291       n  = ii[1] - ii[0]; ii++;
1292       xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1293       x4 = xb[3];   x5 = xb[4];
1294       ib = idx + ai[i];
1295       for ( j=0; j<n; j++ ) {
1296         rval      = ib[j]*5;
1297         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1298         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1299         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1300         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1301         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1302         v += 25;
1303       }
1304     }
1305     break;
1306       /* block sizes larger then 5 by 5 are handled by BLAS */
1307     default: {
1308       int  _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt;
1309       if (!a->mult_work) {
1310         k = PetscMax(a->m,a->n);
1311         a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));
1312         CHKPTRQ(a->mult_work);
1313       }
1314       work = a->mult_work;
1315       for ( i=0; i<mbs; i++ ) {
1316         n     = ii[1] - ii[0]; ii++;
1317         ncols = n*bs;
1318         PetscMemzero(work,ncols*sizeof(Scalar));
1319         LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One);
1320         v += n*bs2;
1321         x += bs;
1322         workt = work;
1323         for ( j=0; j<n; j++ ) {
1324           zb = z + bs*(*idx++);
1325           for ( k=0; k<bs; k++ ) zb[k] += workt[k] ;
1326           workt += bs;
1327         }
1328       }
1329     }
1330   }
1331   ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr);
1332   ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr);
1333   PLogFlops(2*a->nz*a->bs2);
1334   return 0;
1335 }
1336 
1337 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1338 {
1339   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1340 
1341   info->rows_global    = (double)a->m;
1342   info->columns_global = (double)a->n;
1343   info->rows_local     = (double)a->m;
1344   info->columns_local  = (double)a->n;
1345   info->block_size     = a->bs2;
1346   info->nz_allocated   = a->maxnz;
1347   info->nz_used        = a->bs2*a->nz;
1348   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1349   /*  if (info->nz_unneeded != A->info.nz_unneeded)
1350     printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */
1351   info->assemblies   = A->num_ass;
1352   info->mallocs      = a->reallocs;
1353   info->memory       = A->mem;
1354   if (A->factor) {
1355     info->fill_ratio_given  = A->info.fill_ratio_given;
1356     info->fill_ratio_needed = A->info.fill_ratio_needed;
1357     info->factor_mallocs    = A->info.factor_mallocs;
1358   } else {
1359     info->fill_ratio_given  = 0;
1360     info->fill_ratio_needed = 0;
1361     info->factor_mallocs    = 0;
1362   }
1363   return 0;
1364 }
1365 
1366 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg)
1367 {
1368   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1369 
1370   if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type");
1371 
1372   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1373   if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)||
1374       (a->nz != b->nz)) {
1375     *flg = PETSC_FALSE; return 0;
1376   }
1377 
1378   /* if the a->i are the same */
1379   if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) {
1380     *flg = PETSC_FALSE; return 0;
1381   }
1382 
1383   /* if a->j are the same */
1384   if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) {
1385     *flg = PETSC_FALSE; return 0;
1386   }
1387 
1388   /* if a->a are the same */
1389   if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) {
1390     *flg = PETSC_FALSE; return 0;
1391   }
1392   *flg = PETSC_TRUE;
1393   return 0;
1394 
1395 }
1396 
1397 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1398 {
1399   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1400   int         i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1401   Scalar      *x, zero = 0.0,*aa,*aa_j;
1402 
1403   bs   = a->bs;
1404   aa   = a->a;
1405   ai   = a->i;
1406   aj   = a->j;
1407   ambs = a->mbs;
1408   bs2  = a->bs2;
1409 
1410   VecSet(&zero,v);
1411   VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n);
1412   if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector");
1413   for ( i=0; i<ambs; i++ ) {
1414     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1415       if (aj[j] == i) {
1416         row  = i*bs;
1417         aa_j = aa+j*bs2;
1418         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1419         break;
1420       }
1421     }
1422   }
1423   return 0;
1424 }
1425 
1426 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1427 {
1428   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1429   Scalar      *l,*r,x,*v,*aa,*li,*ri;
1430   int         i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1431 
1432   ai  = a->i;
1433   aj  = a->j;
1434   aa  = a->a;
1435   m   = a->m;
1436   n   = a->n;
1437   bs  = a->bs;
1438   mbs = a->mbs;
1439   bs2 = a->bs2;
1440   if (ll) {
1441     VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm);
1442     if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length");
1443     for ( i=0; i<mbs; i++ ) { /* for each block row */
1444       M  = ai[i+1] - ai[i];
1445       li = l + i*bs;
1446       v  = aa + bs2*ai[i];
1447       for ( j=0; j<M; j++ ) { /* for each block */
1448         for ( k=0; k<bs2; k++ ) {
1449           (*v++) *= li[k%bs];
1450         }
1451       }
1452     }
1453   }
1454 
1455   if (rr) {
1456     VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn);
1457     if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length");
1458     for ( i=0; i<mbs; i++ ) { /* for each block row */
1459       M  = ai[i+1] - ai[i];
1460       v  = aa + bs2*ai[i];
1461       for ( j=0; j<M; j++ ) { /* for each block */
1462         ri = r + bs*aj[ai[i]+j];
1463         for ( k=0; k<bs; k++ ) {
1464           x = ri[k];
1465           for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x;
1466         }
1467       }
1468     }
1469   }
1470   return 0;
1471 }
1472 
1473 
1474 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*);
1475 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double);
1476 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int);
1477 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*);
1478 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**);
1479 
1480 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
1481 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
1482 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
1483 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
1484 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
1485 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
1486 
1487 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*);
1488 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*);
1489 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*);
1490 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*);
1491 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*);
1492 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*);
1493 
1494 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm)
1495 {
1496   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1497   Scalar      *v = a->a;
1498   double      sum = 0.0;
1499   int         i,nz=a->nz,bs2=a->bs2;
1500 
1501   if (type == NORM_FROBENIUS) {
1502     for (i=0; i< bs2*nz; i++ ) {
1503 #if defined(PETSC_COMPLEX)
1504       sum += real(conj(*v)*(*v)); v++;
1505 #else
1506       sum += (*v)*(*v); v++;
1507 #endif
1508     }
1509     *norm = sqrt(sum);
1510   }
1511   else {
1512     SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet");
1513   }
1514   return 0;
1515 }
1516 
1517 /*
1518      note: This can only work for identity for row and col. It would
1519    be good to check this and otherwise generate an error.
1520 */
1521 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill)
1522 {
1523   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1524   Mat         outA;
1525   int         ierr;
1526 
1527   if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported");
1528 
1529   outA          = inA;
1530   inA->factor   = FACTOR_LU;
1531   a->row        = row;
1532   a->col        = col;
1533 
1534   a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work);
1535 
1536   if (!a->diag) {
1537     ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr);
1538   }
1539   ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr);
1540   return 0;
1541 }
1542 
1543 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA)
1544 {
1545   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data;
1546   int         one = 1, totalnz = a->bs2*a->nz;
1547   BLscal_( &totalnz, alpha, a->a, &one );
1548   PLogFlops(totalnz);
1549   return 0;
1550 }
1551 
1552 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v)
1553 {
1554   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data;
1555   int        *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
1556   int        *ai = a->i, *ailen = a->ilen;
1557   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1558   Scalar     *ap, *aa = a->a, zero = 0.0;
1559 
1560   for ( k=0; k<m; k++ ) { /* loop over rows */
1561     row  = im[k]; brow = row/bs;
1562     if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row");
1563     if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large");
1564     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1565     nrow = ailen[brow];
1566     for ( l=0; l<n; l++ ) { /* loop over columns */
1567       if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column");
1568       if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large");
1569       col  = in[l] ;
1570       bcol = col/bs;
1571       cidx = col%bs;
1572       ridx = row%bs;
1573       high = nrow;
1574       low  = 0; /* assume unsorted */
1575       while (high-low > 5) {
1576         t = (low+high)/2;
1577         if (rp[t] > bcol) high = t;
1578         else             low  = t;
1579       }
1580       for ( i=low; i<high; i++ ) {
1581         if (rp[i] > bcol) break;
1582         if (rp[i] == bcol) {
1583           *v++ = ap[bs2*i+bs*cidx+ridx];
1584           goto finished;
1585         }
1586       }
1587       *v++ = zero;
1588       finished:;
1589     }
1590   }
1591   return 0;
1592 }
1593 
1594 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs)
1595 {
1596   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data;
1597   *bs = baij->bs;
1598   return 0;
1599 }
1600 
1601 /* idx should be of length atlease bs */
1602 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg)
1603 {
1604   int i,row;
1605   row = idx[0];
1606   if (row%bs!=0) { *flg = PETSC_FALSE; return 0; }
1607 
1608   for ( i=1; i<bs; i++ ) {
1609     if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; }
1610   }
1611   *flg = PETSC_TRUE;
1612   return 0;
1613 }
1614 
1615 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag)
1616 {
1617   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1618   IS          is_local;
1619   int         ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2;
1620   PetscTruth  flg;
1621   Scalar      zero = 0.0,*aa;
1622 
1623   /* Make a copy of the IS and  sort it */
1624   ierr = ISGetSize(is,&is_n);CHKERRQ(ierr);
1625   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1626   ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr);
1627   ierr = ISSort(is_local); CHKERRQ(ierr);
1628   ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr);
1629 
1630   i = 0;
1631   while (i < is_n) {
1632     if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range");
1633     flg = PETSC_FALSE;
1634     if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); }
1635     if (flg) { /* There exists a block of rows to be Zerowed */
1636       baij->ilen[rows[i]/bs] = 0;
1637       i += bs;
1638     } else { /* Zero out only the requested row */
1639       count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs;
1640       aa    = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs);
1641       for ( j=0; j<count; j++ ) {
1642         aa[0] = zero;
1643         aa+=bs;
1644       }
1645       i++;
1646     }
1647   }
1648   if (diag) {
1649     for ( j=0; j<is_n; j++ ) {
1650       ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1651     }
1652   }
1653   ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr);
1654   ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr);
1655   ierr = ISDestroy(is_local); CHKERRQ(ierr);
1656   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1657   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1658 
1659   return 0;
1660 }
1661 int MatPrintHelp_SeqBAIJ(Mat A)
1662 {
1663   static int called = 0;
1664   MPI_Comm   comm = A->comm;
1665 
1666   if (called) return 0; else called = 1;
1667   PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");
1668   PetscPrintf(comm,"  -mat_block_size <block_size>\n");
1669   return 0;
1670 }
1671 
1672 /* -------------------------------------------------------------------*/
1673 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ,
1674        MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ,
1675        MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N,
1676        MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ,
1677        MatSolve_SeqBAIJ_N,0,
1678        0,0,
1679        MatLUFactor_SeqBAIJ,0,
1680        0,
1681        MatTranspose_SeqBAIJ,
1682        MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ,
1683        MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ,
1684        0,MatAssemblyEnd_SeqBAIJ,
1685        0,
1686        MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ,
1687        MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0,
1688        MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ,
1689        MatILUFactorSymbolic_SeqBAIJ,0,
1690        0,0,/*  MatConvert_SeqBAIJ  */ 0,
1691        MatConvertSameType_SeqBAIJ,0,0,
1692        MatILUFactor_SeqBAIJ,0,0,
1693        MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ,
1694        MatGetValues_SeqBAIJ,0,
1695        MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ,
1696        0,0,0,MatGetBlockSize_SeqBAIJ,
1697        MatGetRowIJ_SeqBAIJ,
1698        MatRestoreRowIJ_SeqBAIJ};
1699 
1700 /*@C
1701    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1702    compressed row) format.  For good matrix assembly performance the
1703    user should preallocate the matrix storage by setting the parameter nz
1704    (or the array nzz).  By setting these parameters accurately, performance
1705    during matrix assembly can be increased by more than a factor of 50.
1706 
1707    Input Parameters:
1708 .  comm - MPI communicator, set to MPI_COMM_SELF
1709 .  bs - size of block
1710 .  m - number of rows
1711 .  n - number of columns
1712 .  nz - number of block nonzeros per block row (same for all rows)
1713 .  nzz - array containing the number of block nonzeros in the various block rows
1714          (possibly different for each block row) or PETSC_NULL
1715 
1716    Output Parameter:
1717 .  A - the matrix
1718 
1719    Notes:
1720    The block AIJ format is fully compatible with standard Fortran 77
1721    storage.  That is, the stored row and column indices can begin at
1722    either one (as in Fortran) or zero.  See the users' manual for details.
1723 
1724    Specify the preallocated storage with either nz or nnz (not both).
1725    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1726    allocation.  For additional details, see the users manual chapter on
1727    matrices and the file $(PETSC_DIR)/Performance.
1728 
1729 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
1730 @*/
1731 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A)
1732 {
1733   Mat         B;
1734   Mat_SeqBAIJ *b;
1735   int         i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size;
1736 
1737   MPI_Comm_size(comm,&size);
1738   if (size > 1) SETERRQ(1,"MatCreateSeqBAIJ:Comm must be of size 1");
1739 
1740   if (mbs*bs!=m || nbs*bs!=n)
1741     SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize");
1742 
1743   *A = 0;
1744   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm);
1745   PLogObjectCreate(B);
1746   B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b);
1747   PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1748   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
1749   ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr);
1750   if (!flg) {
1751     switch (bs) {
1752       case 1:
1753         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1754         B->ops.solve           = MatSolve_SeqBAIJ_1;
1755         B->ops.mult            = MatMult_SeqBAIJ_1;
1756         B->ops.multadd         = MatMultAdd_SeqBAIJ_1;
1757        break;
1758       case 2:
1759         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1760         B->ops.solve           = MatSolve_SeqBAIJ_2;
1761         B->ops.mult            = MatMult_SeqBAIJ_2;
1762         B->ops.multadd         = MatMultAdd_SeqBAIJ_2;
1763         break;
1764       case 3:
1765         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
1766         B->ops.solve           = MatSolve_SeqBAIJ_3;
1767         B->ops.mult            = MatMult_SeqBAIJ_3;
1768         B->ops.multadd         = MatMultAdd_SeqBAIJ_3;
1769         break;
1770       case 4:
1771         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
1772         B->ops.solve           = MatSolve_SeqBAIJ_4;
1773         B->ops.mult            = MatMult_SeqBAIJ_4;
1774         B->ops.multadd         = MatMultAdd_SeqBAIJ_4;
1775         break;
1776       case 5:
1777         B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
1778         B->ops.solve           = MatSolve_SeqBAIJ_5;
1779         B->ops.mult            = MatMult_SeqBAIJ_5;
1780         B->ops.multadd         = MatMultAdd_SeqBAIJ_5;
1781         break;
1782     }
1783   }
1784   B->destroy          = MatDestroy_SeqBAIJ;
1785   B->view             = MatView_SeqBAIJ;
1786   B->factor           = 0;
1787   B->lupivotthreshold = 1.0;
1788   B->mapping          = 0;
1789   b->row              = 0;
1790   b->col              = 0;
1791   b->reallocs         = 0;
1792 
1793   b->m       = m; B->m = m; B->M = m;
1794   b->n       = n; B->n = n; B->N = n;
1795   b->mbs     = mbs;
1796   b->nbs     = nbs;
1797   b->imax    = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax);
1798   if (nnz == PETSC_NULL) {
1799     if (nz == PETSC_DEFAULT) nz = 5;
1800     else if (nz <= 0)        nz = 1;
1801     for ( i=0; i<mbs; i++ ) b->imax[i] = nz;
1802     nz = nz*mbs;
1803   }
1804   else {
1805     nz = 0;
1806     for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];}
1807   }
1808 
1809   /* allocate the matrix space */
1810   len   = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int);
1811   b->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a);
1812   PetscMemzero(b->a,nz*bs2*sizeof(Scalar));
1813   b->j  = (int *) (b->a + nz*bs2);
1814   PetscMemzero(b->j,nz*sizeof(int));
1815   b->i  = b->j + nz;
1816   b->singlemalloc = 1;
1817 
1818   b->i[0] = 0;
1819   for (i=1; i<mbs+1; i++) {
1820     b->i[i] = b->i[i-1] + b->imax[i-1];
1821   }
1822 
1823   /* b->ilen will count nonzeros in each block row so far. */
1824   b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int));
1825   PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1826   for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;}
1827 
1828   b->bs               = bs;
1829   b->bs2              = bs2;
1830   b->mbs              = mbs;
1831   b->nz               = 0;
1832   b->maxnz            = nz*bs2;
1833   b->sorted           = 0;
1834   b->roworiented      = 1;
1835   b->nonew            = 0;
1836   b->diag             = 0;
1837   b->solve_work       = 0;
1838   b->mult_work        = 0;
1839   b->spptr            = 0;
1840   B->info.nz_unneeded = (double)b->maxnz;
1841 
1842   *A = B;
1843   ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr);
1844   if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); }
1845   return 0;
1846 }
1847 
1848 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues)
1849 {
1850   Mat         C;
1851   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data;
1852   int         i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
1853 
1854   if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix");
1855 
1856   *B = 0;
1857   PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm);
1858   PLogObjectCreate(C);
1859   C->data       = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c);
1860   PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps));
1861   C->destroy    = MatDestroy_SeqBAIJ;
1862   C->view       = MatView_SeqBAIJ;
1863   C->factor     = A->factor;
1864   c->row        = 0;
1865   c->col        = 0;
1866   C->assembled  = PETSC_TRUE;
1867 
1868   c->m = C->m   = a->m;
1869   c->n = C->n   = a->n;
1870   C->M          = a->m;
1871   C->N          = a->n;
1872 
1873   c->bs         = a->bs;
1874   c->bs2        = a->bs2;
1875   c->mbs        = a->mbs;
1876   c->nbs        = a->nbs;
1877 
1878   c->imax       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax);
1879   c->ilen       = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen);
1880   for ( i=0; i<mbs; i++ ) {
1881     c->imax[i] = a->imax[i];
1882     c->ilen[i] = a->ilen[i];
1883   }
1884 
1885   /* allocate the matrix space */
1886   c->singlemalloc = 1;
1887   len   = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int));
1888   c->a  = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a);
1889   c->j  = (int *) (c->a + nz*bs2);
1890   c->i  = c->j + nz;
1891   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1892   if (mbs > 0) {
1893     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1894     if (cpvalues == COPY_VALUES) {
1895       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar));
1896     }
1897   }
1898 
1899   PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ));
1900   c->sorted      = a->sorted;
1901   c->roworiented = a->roworiented;
1902   c->nonew       = a->nonew;
1903 
1904   if (a->diag) {
1905     c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag);
1906     PLogObjectMemory(C,(mbs+1)*sizeof(int));
1907     for ( i=0; i<mbs; i++ ) {
1908       c->diag[i] = a->diag[i];
1909     }
1910   }
1911   else c->diag          = 0;
1912   c->nz                 = a->nz;
1913   c->maxnz              = a->maxnz;
1914   c->solve_work         = 0;
1915   c->spptr              = 0;      /* Dangerous -I'm throwing away a->spptr */
1916   c->mult_work          = 0;
1917   *B = C;
1918   return 0;
1919 }
1920 
1921 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A)
1922 {
1923   Mat_SeqBAIJ  *a;
1924   Mat          B;
1925   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg;
1926   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1927   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1928   int          *masked, nmask,tmp,bs2,ishift;
1929   Scalar       *aa;
1930   MPI_Comm     comm = ((PetscObject) viewer)->comm;
1931 
1932   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
1933   bs2  = bs*bs;
1934 
1935   MPI_Comm_size(comm,&size);
1936   if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor");
1937   ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
1938   ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr);
1939   if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object");
1940   M = header[1]; N = header[2]; nz = header[3];
1941 
1942   if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices");
1943 
1944   /*
1945      This code adds extra rows to make sure the number of rows is
1946     divisible by the blocksize
1947   */
1948   mbs        = M/bs;
1949   extra_rows = bs - M + bs*(mbs);
1950   if (extra_rows == bs) extra_rows = 0;
1951   else                  mbs++;
1952   if (extra_rows) {
1953     PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
1954   }
1955 
1956   /* read in row lengths */
1957   rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
1958   ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr);
1959   for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
1960 
1961   /* read in column indices */
1962   jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj);
1963   ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr);
1964   for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i;
1965 
1966   /* loop over row lengths determining block row lengths */
1967   browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths);
1968   PetscMemzero(browlengths,mbs*sizeof(int));
1969   mask   = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask);
1970   PetscMemzero(mask,mbs*sizeof(int));
1971   masked = mask + mbs;
1972   rowcount = 0; nzcount = 0;
1973   for ( i=0; i<mbs; i++ ) {
1974     nmask = 0;
1975     for ( j=0; j<bs; j++ ) {
1976       kmax = rowlengths[rowcount];
1977       for ( k=0; k<kmax; k++ ) {
1978         tmp = jj[nzcount++]/bs;
1979         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1980       }
1981       rowcount++;
1982     }
1983     browlengths[i] += nmask;
1984     /* zero out the mask elements we set */
1985     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
1986   }
1987 
1988   /* create our matrix */
1989   ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1990          CHKERRQ(ierr);
1991   B = *A;
1992   a = (Mat_SeqBAIJ *) B->data;
1993 
1994   /* set matrix "i" values */
1995   a->i[0] = 0;
1996   for ( i=1; i<= mbs; i++ ) {
1997     a->i[i]      = a->i[i-1] + browlengths[i-1];
1998     a->ilen[i-1] = browlengths[i-1];
1999   }
2000   a->nz         = 0;
2001   for ( i=0; i<mbs; i++ ) a->nz += browlengths[i];
2002 
2003   /* read in nonzero values */
2004   aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa);
2005   ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr);
2006   for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0;
2007 
2008   /* set "a" and "j" values into matrix */
2009   nzcount = 0; jcount = 0;
2010   for ( i=0; i<mbs; i++ ) {
2011     nzcountb = nzcount;
2012     nmask    = 0;
2013     for ( j=0; j<bs; j++ ) {
2014       kmax = rowlengths[i*bs+j];
2015       for ( k=0; k<kmax; k++ ) {
2016         tmp = jj[nzcount++]/bs;
2017 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2018       }
2019       rowcount++;
2020     }
2021     /* sort the masked values */
2022     PetscSortInt(nmask,masked);
2023 
2024     /* set "j" values into matrix */
2025     maskcount = 1;
2026     for ( j=0; j<nmask; j++ ) {
2027       a->j[jcount++]  = masked[j];
2028       mask[masked[j]] = maskcount++;
2029     }
2030     /* set "a" values into matrix */
2031     ishift = bs2*a->i[i];
2032     for ( j=0; j<bs; j++ ) {
2033       kmax = rowlengths[i*bs+j];
2034       for ( k=0; k<kmax; k++ ) {
2035         tmp    = jj[nzcountb]/bs ;
2036         block  = mask[tmp] - 1;
2037         point  = jj[nzcountb] - bs*tmp;
2038         idx    = ishift + bs2*block + j + bs*point;
2039         a->a[idx] = aa[nzcountb++];
2040       }
2041     }
2042     /* zero out the mask elements we set */
2043     for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0;
2044   }
2045   if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix");
2046 
2047   PetscFree(rowlengths);
2048   PetscFree(browlengths);
2049   PetscFree(aa);
2050   PetscFree(jj);
2051   PetscFree(mask);
2052 
2053   B->assembled = PETSC_TRUE;
2054 
2055   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr);
2056   if (flg) {
2057     Viewer tviewer;
2058     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2059     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr);
2060     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2061     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2062   }
2063   ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr);
2064   if (flg) {
2065     Viewer tviewer;
2066     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2067     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr);
2068     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2069     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2070   }
2071   ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr);
2072   if (flg) {
2073     Viewer tviewer;
2074     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2075     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2076     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2077   }
2078   ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr);
2079   if (flg) {
2080     Viewer tviewer;
2081     ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr);
2082     ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr);
2083     ierr = MatView(B,tviewer); CHKERRQ(ierr);
2084     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2085   }
2086   ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr);
2087   if (flg) {
2088     Viewer tviewer;
2089     ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr);
2090     ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr);
2091     ierr = ViewerFlush(tviewer); CHKERRQ(ierr);
2092     ierr = ViewerDestroy(tviewer); CHKERRQ(ierr);
2093   }
2094   return 0;
2095 }
2096 
2097 
2098 
2099