xref: /petsc/src/mat/impls/baij/seq/baij2.c (revision 4482741e5b2e2bbc854fb1f8dba65221386520f2)
1 /*$Id: baij2.c,v 1.75 2001/09/07 20:09:49 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/seq/baij.h"
4 #include "src/inline/spops.h"
5 #include "src/inline/ilu.h"
6 #include "petscbt.h"
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ"
10 int MatIncreaseOverlap_SeqBAIJ(Mat A,int is_max,IS is[],int ov)
11 {
12   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
13   int         row,i,j,k,l,m,n,*idx,ierr,*nidx,isz,val,ival;
14   int         start,end,*ai,*aj,bs,*nidx2;
15   PetscBT     table;
16 
17   PetscFunctionBegin;
18   m     = a->mbs;
19   ai    = a->i;
20   aj    = a->j;
21   bs    = a->bs;
22 
23   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
24 
25   ierr = PetscBTCreate(m,table);CHKERRQ(ierr);
26   ierr = PetscMalloc((m+1)*sizeof(int),&nidx);CHKERRQ(ierr);
27   ierr = PetscMalloc((A->m+1)*sizeof(int),&nidx2);CHKERRQ(ierr);
28 
29   for (i=0; i<is_max; i++) {
30     /* Initialise the two local arrays */
31     isz  = 0;
32     ierr = PetscBTMemzero(m,table);CHKERRQ(ierr);
33 
34     /* Extract the indices, assume there can be duplicate entries */
35     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
36     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
37 
38     /* Enter these into the temp arrays i.e mark table[row], enter row into new index */
39     for (j=0; j<n ; ++j){
40       ival = idx[j]/bs; /* convert the indices into block indices */
41       if (ival>=m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
42       if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;}
43     }
44     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
45     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
46 
47     k = 0;
48     for (j=0; j<ov; j++){ /* for each overlap*/
49       n = isz;
50       for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */
51         row   = nidx[k];
52         start = ai[row];
53         end   = ai[row+1];
54         for (l = start; l<end ; l++){
55           val = aj[l];
56           if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;}
57         }
58       }
59     }
60     /* expand the Index Set */
61     for (j=0; j<isz; j++) {
62       for (k=0; k<bs; k++)
63         nidx2[j*bs+k] = nidx[j]*bs+k;
64     }
65     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
66   }
67   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
68   ierr = PetscFree(nidx);CHKERRQ(ierr);
69   ierr = PetscFree(nidx2);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 
73 #undef __FUNCT__
74 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private"
75 int MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
76 {
77   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data,*c;
78   int          *smap,i,k,kstart,kend,ierr,oldcols = a->nbs,*lens;
79   int          row,mat_i,*mat_j,tcol,*mat_ilen;
80   int          *irow,*icol,nrows,ncols,*ssmap,bs=a->bs,bs2=a->bs2;
81   int          *aj = a->j,*ai = a->i;
82   MatScalar    *mat_a;
83   Mat          C;
84   PetscTruth   flag;
85 
86   PetscFunctionBegin;
87   ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr);
88   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
89 
90   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
91   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
92   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
93   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
94 
95   ierr = PetscMalloc((1+oldcols)*sizeof(int),&smap);CHKERRQ(ierr);
96   ssmap = smap;
97   ierr = PetscMalloc((1+nrows)*sizeof(int),&lens);CHKERRQ(ierr);
98   ierr  = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr);
99   for (i=0; i<ncols; i++) smap[icol[i]] = i+1;
100   /* determine lens of each row */
101   for (i=0; i<nrows; i++) {
102     kstart  = ai[irow[i]];
103     kend    = kstart + a->ilen[irow[i]];
104     lens[i] = 0;
105       for (k=kstart; k<kend; k++) {
106         if (ssmap[aj[k]]) {
107           lens[i]++;
108         }
109       }
110     }
111   /* Create and fill new matrix */
112   if (scall == MAT_REUSE_MATRIX) {
113     c = (Mat_SeqBAIJ *)((*B)->data);
114 
115     if (c->mbs!=nrows || c->nbs!=ncols || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
116     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr);
117     if (flag == PETSC_FALSE) {
118       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
119     }
120     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr);
121     C = *B;
122   } else {
123     ierr = MatCreate(A->comm,nrows*bs,ncols*bs,PETSC_DETERMINE,PETSC_DETERMINE,&C);CHKERRQ(ierr);
124     ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
125     ierr = MatSeqBAIJSetPreallocation(C,bs,0,lens);CHKERRQ(ierr);
126   }
127   c = (Mat_SeqBAIJ *)(C->data);
128   for (i=0; i<nrows; i++) {
129     row    = irow[i];
130     kstart = ai[row];
131     kend   = kstart + a->ilen[row];
132     mat_i  = c->i[i];
133     mat_j  = c->j + mat_i;
134     mat_a  = c->a + mat_i*bs2;
135     mat_ilen = c->ilen + i;
136     for (k=kstart; k<kend; k++) {
137       if ((tcol=ssmap[a->j[k]])) {
138         *mat_j++ = tcol - 1;
139         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
140         mat_a   += bs2;
141         (*mat_ilen)++;
142       }
143     }
144   }
145 
146   /* Free work space */
147   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
148   ierr = PetscFree(smap);CHKERRQ(ierr);
149   ierr = PetscFree(lens);CHKERRQ(ierr);
150   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
151   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
152 
153   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
154   *B = C;
155   PetscFunctionReturn(0);
156 }
157 
158 #undef __FUNCT__
159 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ"
160 int MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
161 {
162   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
163   IS          is1,is2;
164   int         *vary,*iary,*irow,*icol,nrows,ncols,i,ierr,bs=a->bs,count;
165 
166   PetscFunctionBegin;
167   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
168   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
169   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
170   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
171 
172   /* Verify if the indices corespond to each element in a block
173    and form the IS with compressed IS */
174   ierr = PetscMalloc(2*(a->mbs+1)*sizeof(int),&vary);CHKERRQ(ierr);
175   iary = vary + a->mbs;
176   ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr);
177   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
178   count = 0;
179   for (i=0; i<a->mbs; i++) {
180     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Index set does not match blocks");
181     if (vary[i]==bs) iary[count++] = i;
182   }
183   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
184 
185   ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr);
186   for (i=0; i<ncols; i++) vary[icol[i]/bs]++;
187   count = 0;
188   for (i=0; i<a->mbs; i++) {
189     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Internal error in PETSc");
190     if (vary[i]==bs) iary[count++] = i;
191   }
192   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);CHKERRQ(ierr);
193   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
194   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
195   ierr = PetscFree(vary);CHKERRQ(ierr);
196 
197   ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,cs,scall,B);CHKERRQ(ierr);
198   ISDestroy(is1);
199   ISDestroy(is2);
200   PetscFunctionReturn(0);
201 }
202 
203 #undef __FUNCT__
204 #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ"
205 int MatGetSubMatrices_SeqBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
206 {
207   int ierr,i;
208 
209   PetscFunctionBegin;
210   if (scall == MAT_INITIAL_MATRIX) {
211     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
212   }
213 
214   for (i=0; i<n; i++) {
215     ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr);
216   }
217   PetscFunctionReturn(0);
218 }
219 
220 
221 /* -------------------------------------------------------*/
222 /* Should check that shapes of vectors and matrices match */
223 /* -------------------------------------------------------*/
224 #include "petscblaslapack.h"
225 
226 #undef __FUNCT__
227 #define __FUNCT__ "MatMult_SeqBAIJ_1"
228 int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
229 {
230   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
231   PetscScalar     *x,*z,sum;
232   MatScalar       *v;
233   int             mbs=a->mbs,i,*idx,*ii,n,ierr;
234 
235   PetscFunctionBegin;
236   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
237   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
238 
239   idx   = a->j;
240   v     = a->a;
241   ii    = a->i;
242 
243   for (i=0; i<mbs; i++) {
244     n    = ii[1] - ii[0]; ii++;
245     sum  = 0.0;
246     while (n--) sum += *v++ * x[*idx++];
247     z[i] = sum;
248   }
249   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
250   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
251   PetscLogFlops(2*a->nz - A->m);
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatMult_SeqBAIJ_2"
257 int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
258 {
259   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
260   PetscScalar     *x,*z,*xb,sum1,sum2;
261   PetscScalar     x1,x2;
262   MatScalar       *v;
263   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
264 
265   PetscFunctionBegin;
266   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
267   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
268 
269   idx   = a->j;
270   v     = a->a;
271   ii    = a->i;
272 
273   for (i=0; i<mbs; i++) {
274     n  = ii[1] - ii[0]; ii++;
275     sum1 = 0.0; sum2 = 0.0;
276     for (j=0; j<n; j++) {
277       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
278       sum1 += v[0]*x1 + v[2]*x2;
279       sum2 += v[1]*x1 + v[3]*x2;
280       v += 4;
281     }
282     z[0] = sum1; z[1] = sum2;
283     z += 2;
284   }
285   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
286   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
287   PetscLogFlops(8*a->nz - A->m);
288   PetscFunctionReturn(0);
289 }
290 
291 #undef __FUNCT__
292 #define __FUNCT__ "MatMult_SeqBAIJ_3"
293 int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
294 {
295   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
296   PetscScalar  *x,*z,*xb,sum1,sum2,sum3,x1,x2,x3;
297   MatScalar    *v;
298   int          ierr,mbs=a->mbs,i,*idx,*ii,j,n;
299 
300 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
301 #pragma disjoint(*v,*z,*xb)
302 #endif
303 
304   PetscFunctionBegin;
305   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
306   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
307 
308   idx   = a->j;
309   v     = a->a;
310   ii    = a->i;
311 
312   for (i=0; i<mbs; i++) {
313     n  = ii[1] - ii[0]; ii++;
314     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
315     for (j=0; j<n; j++) {
316       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
317       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
318       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
319       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
320       v += 9;
321     }
322     z[0] = sum1; z[1] = sum2; z[2] = sum3;
323     z += 3;
324   }
325   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
326   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
327   PetscLogFlops(18*a->nz - A->m);
328   PetscFunctionReturn(0);
329 }
330 
331 #undef __FUNCT__
332 #define __FUNCT__ "MatMult_SeqBAIJ_4"
333 int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
334 {
335   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
336   PetscScalar     *x,*z,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4;
337   MatScalar       *v;
338   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
339 
340   PetscFunctionBegin;
341   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
342   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
343 
344   idx   = a->j;
345   v     = a->a;
346   ii    = a->i;
347 
348   for (i=0; i<mbs; i++) {
349     n  = ii[1] - ii[0]; ii++;
350     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
351     for (j=0; j<n; j++) {
352       xb = x + 4*(*idx++);
353       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
354       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
355       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
356       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
357       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
358       v += 16;
359     }
360     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
361     z += 4;
362   }
363   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
364   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
365   PetscLogFlops(32*a->nz - A->m);
366   PetscFunctionReturn(0);
367 }
368 
369 #undef __FUNCT__
370 #define __FUNCT__ "MatMult_SeqBAIJ_5"
371 int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
372 {
373   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
374   PetscScalar     sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*xb,*z,*x;
375   MatScalar       *v;
376   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
377 
378   PetscFunctionBegin;
379   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
380   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
381 
382   idx   = a->j;
383   v     = a->a;
384   ii    = a->i;
385 
386   for (i=0; i<mbs; i++) {
387     n  = ii[1] - ii[0]; ii++;
388     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
389     for (j=0; j<n; j++) {
390       xb = x + 5*(*idx++);
391       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
392       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
393       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
394       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
395       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
396       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
397       v += 25;
398     }
399     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
400     z += 5;
401   }
402   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
403   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
404   PetscLogFlops(50*a->nz - A->m);
405   PetscFunctionReturn(0);
406 }
407 
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "MatMult_SeqBAIJ_6"
411 int MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz)
412 {
413   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
414   PetscScalar     *x,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
415   PetscScalar     x1,x2,x3,x4,x5,x6;
416   MatScalar       *v;
417   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
418 
419   PetscFunctionBegin;
420   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
421   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
422 
423   idx   = a->j;
424   v     = a->a;
425   ii    = a->i;
426 
427   for (i=0; i<mbs; i++) {
428     n  = ii[1] - ii[0]; ii++;
429     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0;
430     for (j=0; j<n; j++) {
431       xb = x + 6*(*idx++);
432       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
433       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
434       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
435       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
436       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
437       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
438       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
439       v += 36;
440     }
441     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
442     z += 6;
443   }
444 
445   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
446   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
447   PetscLogFlops(72*a->nz - A->m);
448   PetscFunctionReturn(0);
449 }
450 #undef __FUNCT__
451 #define __FUNCT__ "MatMult_SeqBAIJ_7"
452 int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
453 {
454   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
455   PetscScalar     *x,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
456   PetscScalar     x1,x2,x3,x4,x5,x6,x7;
457   MatScalar       *v;
458   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
459 
460   PetscFunctionBegin;
461   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
462   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
463 
464   idx   = a->j;
465   v     = a->a;
466   ii    = a->i;
467 
468   for (i=0; i<mbs; i++) {
469     n  = ii[1] - ii[0]; ii++;
470     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
471     for (j=0; j<n; j++) {
472       xb = x + 7*(*idx++);
473       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
474       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
475       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
476       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
477       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
478       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
479       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
480       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
481       v += 49;
482     }
483     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
484     z += 7;
485   }
486 
487   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
488   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
489   PetscLogFlops(98*a->nz - A->m);
490   PetscFunctionReturn(0);
491 }
492 
493 /*
494     This will not work with MatScalar == float because it calls the BLAS
495 */
496 #undef __FUNCT__
497 #define __FUNCT__ "MatMult_SeqBAIJ_N"
498 int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
499 {
500   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
501   PetscScalar     *x,*z,*xb,*work,*workt;
502   MatScalar       *v;
503   int             ierr,mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
504   int             ncols,k;
505 
506   PetscFunctionBegin;
507   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
508   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
509 
510   idx   = a->j;
511   v     = a->a;
512   ii    = a->i;
513 
514 
515   if (!a->mult_work) {
516     k    = PetscMax(A->m,A->n);
517     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
518   }
519   work = a->mult_work;
520   for (i=0; i<mbs; i++) {
521     n     = ii[1] - ii[0]; ii++;
522     ncols = n*bs;
523     workt = work;
524     for (j=0; j<n; j++) {
525       xb = x + bs*(*idx++);
526       for (k=0; k<bs; k++) workt[k] = xb[k];
527       workt += bs;
528     }
529     Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z);
530     /* LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */
531     v += n*bs2;
532     z += bs;
533   }
534   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
535   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
536   PetscLogFlops(2*a->nz*bs2 - A->m);
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatMultAdd_SeqBAIJ_1"
542 int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
543 {
544   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
545   PetscScalar     *x,*y,*z,sum;
546   MatScalar       *v;
547   int             ierr,mbs=a->mbs,i,*idx,*ii,n;
548 
549   PetscFunctionBegin;
550   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
551   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
552   if (zz != yy) {
553     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
554   } else {
555     z = y;
556   }
557 
558   idx   = a->j;
559   v     = a->a;
560   ii    = a->i;
561 
562   for (i=0; i<mbs; i++) {
563     n    = ii[1] - ii[0]; ii++;
564     sum  = y[i];
565     while (n--) sum += *v++ * x[*idx++];
566     z[i] = sum;
567   }
568   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
569   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
570   if (zz != yy) {
571     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
572   }
573   PetscLogFlops(2*a->nz);
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatMultAdd_SeqBAIJ_2"
579 int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
580 {
581   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
582   PetscScalar     *x,*y,*z,*xb,sum1,sum2;
583   PetscScalar     x1,x2;
584   MatScalar       *v;
585   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
586 
587   PetscFunctionBegin;
588   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
589   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
590   if (zz != yy) {
591     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
592   } else {
593     z = y;
594   }
595 
596   idx   = a->j;
597   v     = a->a;
598   ii    = a->i;
599 
600   for (i=0; i<mbs; i++) {
601     n  = ii[1] - ii[0]; ii++;
602     sum1 = y[0]; sum2 = y[1];
603     for (j=0; j<n; j++) {
604       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
605       sum1 += v[0]*x1 + v[2]*x2;
606       sum2 += v[1]*x1 + v[3]*x2;
607       v += 4;
608     }
609     z[0] = sum1; z[1] = sum2;
610     z += 2; y += 2;
611   }
612   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
613   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
614   if (zz != yy) {
615     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
616   }
617   PetscLogFlops(4*a->nz);
618   PetscFunctionReturn(0);
619 }
620 
621 #undef __FUNCT__
622 #define __FUNCT__ "MatMultAdd_SeqBAIJ_3"
623 int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
624 {
625   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
626   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,x1,x2,x3;
627   MatScalar       *v;
628   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
629 
630   PetscFunctionBegin;
631   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
632   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
633   if (zz != yy) {
634     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
635   } else {
636     z = y;
637   }
638 
639   idx   = a->j;
640   v     = a->a;
641   ii    = a->i;
642 
643   for (i=0; i<mbs; i++) {
644     n  = ii[1] - ii[0]; ii++;
645     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
646     for (j=0; j<n; j++) {
647       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
648       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
649       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
650       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
651       v += 9;
652     }
653     z[0] = sum1; z[1] = sum2; z[2] = sum3;
654     z += 3; y += 3;
655   }
656   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
657   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
658   if (zz != yy) {
659     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
660   }
661   PetscLogFlops(18*a->nz);
662   PetscFunctionReturn(0);
663 }
664 
665 #undef __FUNCT__
666 #define __FUNCT__ "MatMultAdd_SeqBAIJ_4"
667 int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
668 {
669   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
670   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4;
671   MatScalar       *v;
672   int             ierr,mbs=a->mbs,i,*idx,*ii;
673   int             j,n;
674 
675   PetscFunctionBegin;
676   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
677   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
678   if (zz != yy) {
679     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
680   } else {
681     z = y;
682   }
683 
684   idx   = a->j;
685   v     = a->a;
686   ii    = a->i;
687 
688   for (i=0; i<mbs; i++) {
689     n  = ii[1] - ii[0]; ii++;
690     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
691     for (j=0; j<n; j++) {
692       xb = x + 4*(*idx++);
693       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
694       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
695       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
696       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
697       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
698       v += 16;
699     }
700     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
701     z += 4; y += 4;
702   }
703   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
704   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
705   if (zz != yy) {
706     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
707   }
708   PetscLogFlops(32*a->nz);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatMultAdd_SeqBAIJ_5"
714 int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
715 {
716   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
717   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
718   MatScalar       *v;
719   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
720 
721   PetscFunctionBegin;
722   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
723   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
724   if (zz != yy) {
725     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
726   } else {
727     z = y;
728   }
729 
730   idx   = a->j;
731   v     = a->a;
732   ii    = a->i;
733 
734   for (i=0; i<mbs; i++) {
735     n  = ii[1] - ii[0]; ii++;
736     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
737     for (j=0; j<n; j++) {
738       xb = x + 5*(*idx++);
739       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
740       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
741       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
742       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
743       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
744       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
745       v += 25;
746     }
747     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
748     z += 5; y += 5;
749   }
750   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
751   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
752   if (zz != yy) {
753     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
754   }
755   PetscLogFlops(50*a->nz);
756   PetscFunctionReturn(0);
757 }
758 #undef __FUNCT__
759 #define __FUNCT__ "MatMultAdd_SeqBAIJ_6"
760 int MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
761 {
762   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
763   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
764   PetscScalar     x1,x2,x3,x4,x5,x6;
765   MatScalar       *v;
766   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
767 
768   PetscFunctionBegin;
769   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
770   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
771   if (zz != yy) {
772     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
773   } else {
774     z = y;
775   }
776 
777   idx   = a->j;
778   v     = a->a;
779   ii    = a->i;
780 
781   for (i=0; i<mbs; i++) {
782     n  = ii[1] - ii[0]; ii++;
783     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5];
784     for (j=0; j<n; j++) {
785       xb = x + 6*(*idx++);
786       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
787       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
788       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
789       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
790       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
791       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
792       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
793       v += 36;
794     }
795     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
796     z += 6; y += 6;
797   }
798   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
799   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
800   if (zz != yy) {
801     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
802   }
803   PetscLogFlops(72*a->nz);
804   PetscFunctionReturn(0);
805 }
806 
807 #undef __FUNCT__
808 #define __FUNCT__ "MatMultAdd_SeqBAIJ_7"
809 int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
810 {
811   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
812   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
813   PetscScalar     x1,x2,x3,x4,x5,x6,x7;
814   MatScalar       *v;
815   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
816 
817   PetscFunctionBegin;
818   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
819   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
820   if (zz != yy) {
821     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
822   } else {
823     z = y;
824   }
825 
826   idx   = a->j;
827   v     = a->a;
828   ii    = a->i;
829 
830   for (i=0; i<mbs; i++) {
831     n  = ii[1] - ii[0]; ii++;
832     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
833     for (j=0; j<n; j++) {
834       xb = x + 7*(*idx++);
835       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
836       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
837       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
838       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
839       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
840       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
841       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
842       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
843       v += 49;
844     }
845     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
846     z += 7; y += 7;
847   }
848   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
849   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
850   if (zz != yy) {
851     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
852   }
853   PetscLogFlops(98*a->nz);
854   PetscFunctionReturn(0);
855 }
856 
857 #undef __FUNCT__
858 #define __FUNCT__ "MatMultAdd_SeqBAIJ_N"
859 int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
860 {
861   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
862   PetscScalar    *x,*z,*xb,*work,*workt,*y;
863   MatScalar      *v;
864   int            mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
865   int            ncols,k;
866 
867   PetscFunctionBegin;
868   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
869   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
870   if (zz != yy) {
871     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
872     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
873     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
874   }
875 
876   idx   = a->j;
877   v     = a->a;
878   ii    = a->i;
879 
880 
881   if (!a->mult_work) {
882     k    = PetscMax(A->m,A->n);
883     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
884   }
885   work = a->mult_work;
886   for (i=0; i<mbs; i++) {
887     n     = ii[1] - ii[0]; ii++;
888     ncols = n*bs;
889     workt = work;
890     for (j=0; j<n; j++) {
891       xb = x + bs*(*idx++);
892       for (k=0; k<bs; k++) workt[k] = xb[k];
893       workt += bs;
894     }
895     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
896     /* LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */
897     v += n*bs2;
898     z += bs;
899   }
900   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
901   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
902   PetscLogFlops(2*a->nz*bs2);
903   PetscFunctionReturn(0);
904 }
905 
906 #undef __FUNCT__
907 #define __FUNCT__ "MatMultTranspose_SeqBAIJ"
908 int MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz)
909 {
910   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
911   PetscScalar     *xg,*zg,*zb,zero = 0.0;
912   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
913   MatScalar       *v;
914   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval;
915   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
916 
917   PetscFunctionBegin;
918   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
919   ierr = VecGetArray(xx,&xg);CHKERRQ(ierr); x = xg;
920   ierr = VecGetArray(zz,&zg);CHKERRQ(ierr); z = zg;
921 
922   idx   = a->j;
923   v     = a->a;
924   ii    = a->i;
925   xb    = x;
926   switch (bs) {
927   case 1:
928     for (i=0; i<mbs; i++) {
929       n  = ii[1] - ii[0]; ii++;
930       x1 = xb[0];
931       ib = idx + ai[i];
932       for (j=0; j<n; j++) {
933         rval    = ib[j];
934         z[rval] += *v * x1;
935         v++;
936       }
937       xb++;
938     }
939     break;
940   case 2:
941     for (i=0; i<mbs; i++) {
942       n  = ii[1] - ii[0]; ii++;
943       x1 = xb[0]; x2 = xb[1];
944       ib = idx + ai[i];
945       for (j=0; j<n; j++) {
946         rval      = ib[j]*2;
947         z[rval++] += v[0]*x1 + v[1]*x2;
948         z[rval]   += v[2]*x1 + v[3]*x2;
949         v  += 4;
950       }
951       xb += 2;
952     }
953     break;
954   case 3:
955     for (i=0; i<mbs; i++) {
956       n  = ii[1] - ii[0]; ii++;
957       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
958       ib = idx + ai[i];
959       for (j=0; j<n; j++) {
960         rval      = ib[j]*3;
961         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
962         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
963         z[rval]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
964         v  += 9;
965       }
966       xb += 3;
967     }
968     break;
969   case 4:
970     for (i=0; i<mbs; i++) {
971       n  = ii[1] - ii[0]; ii++;
972       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
973       ib = idx + ai[i];
974       for (j=0; j<n; j++) {
975         rval      = ib[j]*4;
976         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
977         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
978         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
979         z[rval]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
980         v  += 16;
981       }
982       xb += 4;
983     }
984     break;
985   case 5:
986     for (i=0; i<mbs; i++) {
987       n  = ii[1] - ii[0]; ii++;
988       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
989       x4 = xb[3]; x5 = xb[4];
990       ib = idx + ai[i];
991       for (j=0; j<n; j++) {
992         rval      = ib[j]*5;
993         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
994         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
995         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
996         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
997         z[rval]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
998         v  += 25;
999       }
1000       xb += 5;
1001     }
1002     break;
1003   case 6:
1004     for (i=0; i<mbs; i++) {
1005       n  = ii[1] - ii[0]; ii++;
1006       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1007       x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
1008       ib = idx + ai[i];
1009       for (j=0; j<n; j++) {
1010         rval      = ib[j]*6;
1011         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 + v[4]*x5 + v[5]*x6;
1012         z[rval++] +=  v[6]*x1 +  v[7]*x2 +  v[8]*x3 +  v[9]*x4 + v[10]*x5 + v[11]*x6;
1013         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4 + v[16]*x5 + v[17]*x6;
1014         z[rval++] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4 + v[22]*x5 + v[23]*x6;
1015         z[rval++] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4 + v[28]*x5 + v[29]*x6;
1016         z[rval]   += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4 + v[34]*x5 + v[35]*x6;
1017         v  += 36;
1018       }
1019       xb += 6;
1020     }
1021     break;
1022   case 7:
1023     for (i=0; i<mbs; i++) {
1024       n  = ii[1] - ii[0]; ii++;
1025       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1026       x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1027       ib = idx + ai[i];
1028       for (j=0; j<n; j++) {
1029         rval      = ib[j]*7;
1030         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 + v[4]*x5 + v[5]*x6 + v[6]*x7;
1031         z[rval++] +=  v[7]*x1 +  v[8]*x2 +  v[9]*x3 + v[10]*x4 + v[11]*x5 + v[12]*x6 + v[13]*x7;
1032         z[rval++] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4 + v[18]*x5 + v[19]*x6 + v[20]*x7;
1033         z[rval++] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4 + v[25]*x5 + v[26]*x6 + v[27]*x7;
1034         z[rval++] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4 + v[32]*x5 + v[33]*x6 + v[34]*x7;
1035         z[rval++] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4 + v[39]*x5 + v[40]*x6 + v[41]*x7;
1036         z[rval]   += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4 + v[46]*x5 + v[47]*x6 + v[48]*x7;
1037         v  += 49;
1038       }
1039       xb += 7;
1040     }
1041     break;
1042   default: {       /* block sizes larger then 7 by 7 are handled by BLAS */
1043       int          ncols,k;
1044       PetscScalar  *work,*workt;
1045 
1046       if (!a->mult_work) {
1047         k = PetscMax(A->m,A->n);
1048         ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1049       }
1050       work = a->mult_work;
1051       for (i=0; i<mbs; i++) {
1052         n     = ii[1] - ii[0]; ii++;
1053         ncols = n*bs;
1054         ierr  = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1055         Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,work);
1056         /* LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); */
1057         v += n*bs2;
1058         x += bs;
1059         workt = work;
1060         for (j=0; j<n; j++) {
1061           zb = z + bs*(*idx++);
1062           for (k=0; k<bs; k++) zb[k] += workt[k] ;
1063           workt += bs;
1064         }
1065       }
1066     }
1067   }
1068   ierr = VecRestoreArray(xx,&xg);CHKERRQ(ierr);
1069   ierr = VecRestoreArray(zz,&zg);CHKERRQ(ierr);
1070   PetscLogFlops(2*a->nz*a->bs2 - A->n);
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 #undef __FUNCT__
1075 #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ"
1076 int MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1077 
1078 {
1079   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1080   PetscScalar     *xg,*zg,*zb,*x,*z,*xb,x1,x2,x3,x4,x5;
1081   MatScalar       *v;
1082   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1083 
1084   PetscFunctionBegin;
1085   if (yy != zz) { ierr = VecCopy(yy,zz);CHKERRQ(ierr); }
1086   ierr = VecGetArray(xx,&xg);CHKERRQ(ierr); x = xg;
1087   ierr = VecGetArray(zz,&zg);CHKERRQ(ierr); z = zg;
1088 
1089 
1090   idx   = a->j;
1091   v     = a->a;
1092   ii    = a->i;
1093   xb    = x;
1094 
1095   switch (bs) {
1096   case 1:
1097     for (i=0; i<mbs; i++) {
1098       n  = ii[1] - ii[0]; ii++;
1099       x1 = xb[0];
1100       ib = idx + ai[i];
1101       for (j=0; j<n; j++) {
1102         rval    = ib[j];
1103         z[rval] += *v * x1;
1104         v++;
1105       }
1106       xb++;
1107     }
1108     break;
1109   case 2:
1110     for (i=0; i<mbs; i++) {
1111       n  = ii[1] - ii[0]; ii++;
1112       x1 = xb[0]; x2 = xb[1];
1113       ib = idx + ai[i];
1114       for (j=0; j<n; j++) {
1115         rval      = ib[j]*2;
1116         z[rval++] += v[0]*x1 + v[1]*x2;
1117         z[rval++] += v[2]*x1 + v[3]*x2;
1118         v  += 4;
1119       }
1120       xb += 2;
1121     }
1122     break;
1123   case 3:
1124     for (i=0; i<mbs; i++) {
1125       n  = ii[1] - ii[0]; ii++;
1126       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1127       ib = idx + ai[i];
1128       for (j=0; j<n; j++) {
1129         rval      = ib[j]*3;
1130         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1131         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1132         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1133         v  += 9;
1134       }
1135       xb += 3;
1136     }
1137     break;
1138   case 4:
1139     for (i=0; i<mbs; i++) {
1140       n  = ii[1] - ii[0]; ii++;
1141       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1142       ib = idx + ai[i];
1143       for (j=0; j<n; j++) {
1144         rval      = ib[j]*4;
1145         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1146         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1147         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1148         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1149         v  += 16;
1150       }
1151       xb += 4;
1152     }
1153     break;
1154   case 5:
1155     for (i=0; i<mbs; i++) {
1156       n  = ii[1] - ii[0]; ii++;
1157       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1158       x4 = xb[3]; x5 = xb[4];
1159       ib = idx + ai[i];
1160       for (j=0; j<n; j++) {
1161         rval      = ib[j]*5;
1162         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1163         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1164         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1165         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1166         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1167         v  += 25;
1168       }
1169       xb += 5;
1170     }
1171     break;
1172   default: {      /* block sizes larger then 5 by 5 are handled by BLAS */
1173       int          ncols,k;
1174       PetscScalar  *work,*workt;
1175 
1176       if (!a->mult_work) {
1177         k = PetscMax(A->m,A->n);
1178         ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1179       }
1180       work = a->mult_work;
1181       for (i=0; i<mbs; i++) {
1182         n     = ii[1] - ii[0]; ii++;
1183         ncols = n*bs;
1184         ierr  = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1185         Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,work);
1186         /* LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); */
1187         v += n*bs2;
1188         x += bs;
1189         workt = work;
1190         for (j=0; j<n; j++) {
1191           zb = z + bs*(*idx++);
1192           for (k=0; k<bs; k++) zb[k] += workt[k] ;
1193           workt += bs;
1194         }
1195       }
1196     }
1197   }
1198   ierr = VecRestoreArray(xx,&xg);CHKERRQ(ierr);
1199   ierr = VecRestoreArray(zz,&zg);CHKERRQ(ierr);
1200   PetscLogFlops(2*a->nz*a->bs2);
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 #undef __FUNCT__
1205 #define __FUNCT__ "MatScale_SeqBAIJ"
1206 int MatScale_SeqBAIJ(const PetscScalar *alpha,Mat inA)
1207 {
1208   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1209   int         totalnz = a->bs2*a->nz;
1210 #if defined(PETSC_USE_MAT_SINGLE)
1211   int         i;
1212 #else
1213   int         one = 1;
1214 #endif
1215 
1216   PetscFunctionBegin;
1217 #if defined(PETSC_USE_MAT_SINGLE)
1218   for (i=0; i<totalnz; i++) a->a[i] *= *alpha;
1219 #else
1220   BLscal_(&totalnz,(PetscScalar*)alpha,a->a,&one);
1221 #endif
1222   PetscLogFlops(totalnz);
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 #undef __FUNCT__
1227 #define __FUNCT__ "MatNorm_SeqBAIJ"
1228 int MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm)
1229 {
1230   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1231   MatScalar   *v = a->a;
1232   PetscReal   sum = 0.0;
1233   int         i,j,k,bs = a->bs,nz=a->nz,bs2=a->bs2,k1;
1234 
1235   PetscFunctionBegin;
1236   if (type == NORM_FROBENIUS) {
1237     for (i=0; i< bs2*nz; i++) {
1238 #if defined(PETSC_USE_COMPLEX)
1239       sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1240 #else
1241       sum += (*v)*(*v); v++;
1242 #endif
1243     }
1244     *norm = sqrt(sum);
1245   }  else if (type == NORM_INFINITY) { /* maximum row sum */
1246     *norm = 0.0;
1247     for (k=0; k<bs; k++) {
1248       for (j=0; j<a->mbs; j++) {
1249         v = a->a + bs2*a->i[j] + k;
1250         sum = 0.0;
1251         for (i=0; i<a->i[j+1]-a->i[j]; i++) {
1252           for (k1=0; k1<bs; k1++){
1253             sum += PetscAbsScalar(*v);
1254             v   += bs;
1255           }
1256         }
1257         if (sum > *norm) *norm = sum;
1258       }
1259     }
1260   } else {
1261     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1262   }
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 
1267 #undef __FUNCT__
1268 #define __FUNCT__ "MatEqual_SeqBAIJ"
1269 int MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg)
1270 {
1271   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data;
1272   int         ierr;
1273 
1274   PetscFunctionBegin;
1275   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1276   if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->nz != b->nz)) {
1277     *flg = PETSC_FALSE;
1278     PetscFunctionReturn(0);
1279   }
1280 
1281   /* if the a->i are the same */
1282   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr);
1283   if (*flg == PETSC_FALSE) {
1284     PetscFunctionReturn(0);
1285   }
1286 
1287   /* if a->j are the same */
1288   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int),flg);CHKERRQ(ierr);
1289   if (*flg == PETSC_FALSE) {
1290     PetscFunctionReturn(0);
1291   }
1292   /* if a->a are the same */
1293   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1294   PetscFunctionReturn(0);
1295 
1296 }
1297 
1298 #undef __FUNCT__
1299 #define __FUNCT__ "MatGetDiagonal_SeqBAIJ"
1300 int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1301 {
1302   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1303   int          ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1304   PetscScalar  *x,zero = 0.0;
1305   MatScalar    *aa,*aa_j;
1306 
1307   PetscFunctionBegin;
1308   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1309   bs   = a->bs;
1310   aa   = a->a;
1311   ai   = a->i;
1312   aj   = a->j;
1313   ambs = a->mbs;
1314   bs2  = a->bs2;
1315 
1316   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1317   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1318   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1319   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1320   for (i=0; i<ambs; i++) {
1321     for (j=ai[i]; j<ai[i+1]; j++) {
1322       if (aj[j] == i) {
1323         row  = i*bs;
1324         aa_j = aa+j*bs2;
1325         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1326         break;
1327       }
1328     }
1329   }
1330   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 #undef __FUNCT__
1335 #define __FUNCT__ "MatDiagonalScale_SeqBAIJ"
1336 int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1337 {
1338   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1339   PetscScalar  *l,*r,x,*li,*ri;
1340   MatScalar    *aa,*v;
1341   int          ierr,i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1342 
1343   PetscFunctionBegin;
1344   ai  = a->i;
1345   aj  = a->j;
1346   aa  = a->a;
1347   m   = A->m;
1348   n   = A->n;
1349   bs  = a->bs;
1350   mbs = a->mbs;
1351   bs2 = a->bs2;
1352   if (ll) {
1353     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
1354     ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1355     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1356     for (i=0; i<mbs; i++) { /* for each block row */
1357       M  = ai[i+1] - ai[i];
1358       li = l + i*bs;
1359       v  = aa + bs2*ai[i];
1360       for (j=0; j<M; j++) { /* for each block */
1361         for (k=0; k<bs2; k++) {
1362           (*v++) *= li[k%bs];
1363         }
1364       }
1365     }
1366     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1367     PetscLogFlops(a->nz);
1368   }
1369 
1370   if (rr) {
1371     ierr = VecGetArray(rr,&r);CHKERRQ(ierr);
1372     ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr);
1373     if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
1374     for (i=0; i<mbs; i++) { /* for each block row */
1375       M  = ai[i+1] - ai[i];
1376       v  = aa + bs2*ai[i];
1377       for (j=0; j<M; j++) { /* for each block */
1378         ri = r + bs*aj[ai[i]+j];
1379         for (k=0; k<bs; k++) {
1380           x = ri[k];
1381           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
1382         }
1383       }
1384     }
1385     ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr);
1386     PetscLogFlops(a->nz);
1387   }
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 
1392 #undef __FUNCT__
1393 #define __FUNCT__ "MatGetInfo_SeqBAIJ"
1394 int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1395 {
1396   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1397 
1398   PetscFunctionBegin;
1399   info->rows_global    = (double)A->m;
1400   info->columns_global = (double)A->n;
1401   info->rows_local     = (double)A->m;
1402   info->columns_local  = (double)A->n;
1403   info->block_size     = a->bs2;
1404   info->nz_allocated   = a->maxnz;
1405   info->nz_used        = a->bs2*a->nz;
1406   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1407   info->assemblies   = A->num_ass;
1408   info->mallocs      = a->reallocs;
1409   info->memory       = A->mem;
1410   if (A->factor) {
1411     info->fill_ratio_given  = A->info.fill_ratio_given;
1412     info->fill_ratio_needed = A->info.fill_ratio_needed;
1413     info->factor_mallocs    = A->info.factor_mallocs;
1414   } else {
1415     info->fill_ratio_given  = 0;
1416     info->fill_ratio_needed = 0;
1417     info->factor_mallocs    = 0;
1418   }
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatZeroEntries_SeqBAIJ"
1425 int MatZeroEntries_SeqBAIJ(Mat A)
1426 {
1427   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1428   int         ierr;
1429 
1430   PetscFunctionBegin;
1431   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
1432   PetscFunctionReturn(0);
1433 }
1434