xref: /petsc/src/mat/impls/baij/seq/baij2.c (revision 5ed2d596fd9914de76abbaed56c65f4792ae57ae)
1 /*$Id: baij2.c,v 1.75 2001/09/07 20:09:49 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/seq/baij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 #include "src/inline/ilu.h"
7 #include "petscbt.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ"
11 int MatIncreaseOverlap_SeqBAIJ(Mat A,int is_max,IS *is,int ov)
12 {
13   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
14   int         row,i,j,k,l,m,n,*idx,ierr,*nidx,isz,val,ival;
15   int         start,end,*ai,*aj,bs,*nidx2;
16   PetscBT     table;
17 
18   PetscFunctionBegin;
19   m     = a->mbs;
20   ai    = a->i;
21   aj    = a->j;
22   bs    = a->bs;
23 
24   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
25 
26   ierr = PetscBTCreate(m,table);CHKERRQ(ierr);
27   ierr = PetscMalloc((m+1)*sizeof(int),&nidx);CHKERRQ(ierr);
28   ierr = PetscMalloc((A->m+1)*sizeof(int),&nidx2);CHKERRQ(ierr);
29 
30   for (i=0; i<is_max; i++) {
31     /* Initialise the two local arrays */
32     isz  = 0;
33     ierr = PetscBTMemzero(m,table);CHKERRQ(ierr);
34 
35     /* Extract the indices, assume there can be duplicate entries */
36     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
37     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
38 
39     /* Enter these into the temp arrays i.e mark table[row], enter row into new index */
40     for (j=0; j<n ; ++j){
41       ival = idx[j]/bs; /* convert the indices into block indices */
42       if (ival>m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
43       if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;}
44     }
45     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
46     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
47 
48     k = 0;
49     for (j=0; j<ov; j++){ /* for each overlap*/
50       n = isz;
51       for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */
52         row   = nidx[k];
53         start = ai[row];
54         end   = ai[row+1];
55         for (l = start; l<end ; l++){
56           val = aj[l];
57           if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;}
58         }
59       }
60     }
61     /* expand the Index Set */
62     for (j=0; j<isz; j++) {
63       for (k=0; k<bs; k++)
64         nidx2[j*bs+k] = nidx[j]*bs+k;
65     }
66     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
67   }
68   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
69   ierr = PetscFree(nidx);CHKERRQ(ierr);
70   ierr = PetscFree(nidx2);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private"
76 int MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
77 {
78   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data,*c;
79   int          *smap,i,k,kstart,kend,ierr,oldcols = a->nbs,*lens;
80   int          row,mat_i,*mat_j,tcol,*mat_ilen;
81   int          *irow,*icol,nrows,ncols,*ssmap,bs=a->bs,bs2=a->bs2;
82   int          *aj = a->j,*ai = a->i;
83   MatScalar    *mat_a;
84   Mat          C;
85   PetscTruth   flag;
86 
87   PetscFunctionBegin;
88   ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr);
89   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
90 
91   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
92   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
93   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
94   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
95 
96   ierr = PetscMalloc((1+oldcols)*sizeof(int),&smap);CHKERRQ(ierr);
97   ssmap = smap;
98   ierr = PetscMalloc((1+nrows)*sizeof(int),&lens);CHKERRQ(ierr);
99   ierr  = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr);
100   for (i=0; i<ncols; i++) smap[icol[i]] = i+1;
101   /* determine lens of each row */
102   for (i=0; i<nrows; i++) {
103     kstart  = ai[irow[i]];
104     kend    = kstart + a->ilen[irow[i]];
105     lens[i] = 0;
106       for (k=kstart; k<kend; k++) {
107         if (ssmap[aj[k]]) {
108           lens[i]++;
109         }
110       }
111     }
112   /* Create and fill new matrix */
113   if (scall == MAT_REUSE_MATRIX) {
114     c = (Mat_SeqBAIJ *)((*B)->data);
115 
116     if (c->mbs!=nrows || c->nbs!=ncols || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
117     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr);
118     if (flag == PETSC_FALSE) {
119       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
120     }
121     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr);
122     C = *B;
123   } else {
124     ierr = MatCreateSeqBAIJ(A->comm,bs,nrows*bs,ncols*bs,0,lens,&C);CHKERRQ(ierr);
125   }
126   c = (Mat_SeqBAIJ *)(C->data);
127   for (i=0; i<nrows; i++) {
128     row    = irow[i];
129     kstart = ai[row];
130     kend   = kstart + a->ilen[row];
131     mat_i  = c->i[i];
132     mat_j  = c->j + mat_i;
133     mat_a  = c->a + mat_i*bs2;
134     mat_ilen = c->ilen + i;
135     for (k=kstart; k<kend; k++) {
136       if ((tcol=ssmap[a->j[k]])) {
137         *mat_j++ = tcol - 1;
138         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
139         mat_a   += bs2;
140         (*mat_ilen)++;
141       }
142     }
143   }
144 
145   /* Free work space */
146   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
147   ierr = PetscFree(smap);CHKERRQ(ierr);
148   ierr = PetscFree(lens);CHKERRQ(ierr);
149   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
150   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
151 
152   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
153   *B = C;
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNCT__
158 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ"
159 int MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
160 {
161   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
162   IS          is1,is2;
163   int         *vary,*iary,*irow,*icol,nrows,ncols,i,ierr,bs=a->bs,count;
164 
165   PetscFunctionBegin;
166   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
167   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
168   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
169   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
170 
171   /* Verify if the indices corespond to each element in a block
172    and form the IS with compressed IS */
173   ierr = PetscMalloc(2*(a->mbs+1)*sizeof(int),&vary);CHKERRQ(ierr);
174   iary = vary + a->mbs;
175   ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr);
176   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
177   count = 0;
178   for (i=0; i<a->mbs; i++) {
179     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Index set does not match blocks");
180     if (vary[i]==bs) iary[count++] = i;
181   }
182   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
183 
184   ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr);
185   for (i=0; i<ncols; i++) vary[icol[i]/bs]++;
186   count = 0;
187   for (i=0; i<a->mbs; i++) {
188     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Internal error in PETSc");
189     if (vary[i]==bs) iary[count++] = i;
190   }
191   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);CHKERRQ(ierr);
192   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
193   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
194   ierr = PetscFree(vary);CHKERRQ(ierr);
195 
196   ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,cs,scall,B);CHKERRQ(ierr);
197   ISDestroy(is1);
198   ISDestroy(is2);
199   PetscFunctionReturn(0);
200 }
201 
202 #undef __FUNCT__
203 #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ"
204 int MatGetSubMatrices_SeqBAIJ(Mat A,int n,IS *irow,IS *icol,MatReuse scall,Mat **B)
205 {
206   int ierr,i;
207 
208   PetscFunctionBegin;
209   if (scall == MAT_INITIAL_MATRIX) {
210     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
211   }
212 
213   for (i=0; i<n; i++) {
214     ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr);
215   }
216   PetscFunctionReturn(0);
217 }
218 
219 
220 /* -------------------------------------------------------*/
221 /* Should check that shapes of vectors and matrices match */
222 /* -------------------------------------------------------*/
223 #include "petscblaslapack.h"
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatMult_SeqBAIJ_1"
227 int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
228 {
229   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
230   PetscScalar     *x,*z,sum;
231   MatScalar       *v;
232   int             mbs=a->mbs,i,*idx,*ii,n,ierr;
233 
234   PetscFunctionBegin;
235   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
236   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
237 
238   idx   = a->j;
239   v     = a->a;
240   ii    = a->i;
241 
242   for (i=0; i<mbs; i++) {
243     n    = ii[1] - ii[0]; ii++;
244     sum  = 0.0;
245     while (n--) sum += *v++ * x[*idx++];
246     z[i] = sum;
247   }
248   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
249   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
250   PetscLogFlops(2*a->nz - A->m);
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatMult_SeqBAIJ_2"
256 int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
257 {
258   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
259   PetscScalar     *x,*z,*xb,sum1,sum2;
260   PetscScalar     x1,x2;
261   MatScalar       *v;
262   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
263 
264   PetscFunctionBegin;
265   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
266   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
267 
268   idx   = a->j;
269   v     = a->a;
270   ii    = a->i;
271 
272   for (i=0; i<mbs; i++) {
273     n  = ii[1] - ii[0]; ii++;
274     sum1 = 0.0; sum2 = 0.0;
275     for (j=0; j<n; j++) {
276       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
277       sum1 += v[0]*x1 + v[2]*x2;
278       sum2 += v[1]*x1 + v[3]*x2;
279       v += 4;
280     }
281     z[0] = sum1; z[1] = sum2;
282     z += 2;
283   }
284   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
285   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
286   PetscLogFlops(8*a->nz - A->m);
287   PetscFunctionReturn(0);
288 }
289 
290 #undef __FUNCT__
291 #define __FUNCT__ "MatMult_SeqBAIJ_3"
292 int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
293 {
294   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
295   PetscScalar  *x,*z,*xb,sum1,sum2,sum3,x1,x2,x3;
296   MatScalar    *v;
297   int          ierr,mbs=a->mbs,i,*idx,*ii,j,n;
298 
299 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
300 #pragma disjoint(*v,*z,*xb)
301 #endif
302 
303   PetscFunctionBegin;
304   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
305   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
306 
307   idx   = a->j;
308   v     = a->a;
309   ii    = a->i;
310 
311   for (i=0; i<mbs; i++) {
312     n  = ii[1] - ii[0]; ii++;
313     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
314     for (j=0; j<n; j++) {
315       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
316       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
317       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
318       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
319       v += 9;
320     }
321     z[0] = sum1; z[1] = sum2; z[2] = sum3;
322     z += 3;
323   }
324   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
325   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
326   PetscLogFlops(18*a->nz - A->m);
327   PetscFunctionReturn(0);
328 }
329 
330 #undef __FUNCT__
331 #define __FUNCT__ "MatMult_SeqBAIJ_4"
332 int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
333 {
334   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
335   PetscScalar     *x,*z,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4;
336   MatScalar       *v;
337   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
338 
339   PetscFunctionBegin;
340   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
341   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
342 
343   idx   = a->j;
344   v     = a->a;
345   ii    = a->i;
346 
347   for (i=0; i<mbs; i++) {
348     n  = ii[1] - ii[0]; ii++;
349     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
350     for (j=0; j<n; j++) {
351       xb = x + 4*(*idx++);
352       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
353       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
354       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
355       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
356       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
357       v += 16;
358     }
359     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
360     z += 4;
361   }
362   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
363   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
364   PetscLogFlops(32*a->nz - A->m);
365   PetscFunctionReturn(0);
366 }
367 
368 #undef __FUNCT__
369 #define __FUNCT__ "MatMult_SeqBAIJ_5"
370 int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
371 {
372   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
373   PetscScalar     sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*xb,*z,*x;
374   MatScalar       *v;
375   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
376 
377   PetscFunctionBegin;
378   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
379   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
380 
381   idx   = a->j;
382   v     = a->a;
383   ii    = a->i;
384 
385   for (i=0; i<mbs; i++) {
386     n  = ii[1] - ii[0]; ii++;
387     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
388     for (j=0; j<n; j++) {
389       xb = x + 5*(*idx++);
390       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
391       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
392       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
393       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
394       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
395       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
396       v += 25;
397     }
398     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
399     z += 5;
400   }
401   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
402   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
403   PetscLogFlops(50*a->nz - A->m);
404   PetscFunctionReturn(0);
405 }
406 
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatMult_SeqBAIJ_6"
410 int MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz)
411 {
412   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
413   PetscScalar     *x,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
414   PetscScalar     x1,x2,x3,x4,x5,x6;
415   MatScalar       *v;
416   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
417 
418   PetscFunctionBegin;
419   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
420   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
421 
422   idx   = a->j;
423   v     = a->a;
424   ii    = a->i;
425 
426   for (i=0; i<mbs; i++) {
427     n  = ii[1] - ii[0]; ii++;
428     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0;
429     for (j=0; j<n; j++) {
430       xb = x + 6*(*idx++);
431       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
432       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
433       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
434       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
435       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
436       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
437       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
438       v += 36;
439     }
440     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
441     z += 6;
442   }
443 
444   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
445   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
446   PetscLogFlops(72*a->nz - A->m);
447   PetscFunctionReturn(0);
448 }
449 #undef __FUNCT__
450 #define __FUNCT__ "MatMult_SeqBAIJ_7"
451 int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
452 {
453   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
454   PetscScalar     *x,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
455   PetscScalar     x1,x2,x3,x4,x5,x6,x7;
456   MatScalar       *v;
457   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
458 
459   PetscFunctionBegin;
460   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
461   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
462 
463   idx   = a->j;
464   v     = a->a;
465   ii    = a->i;
466 
467   for (i=0; i<mbs; i++) {
468     n  = ii[1] - ii[0]; ii++;
469     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
470     for (j=0; j<n; j++) {
471       xb = x + 7*(*idx++);
472       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
473       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
474       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
475       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
476       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
477       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
478       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
479       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
480       v += 49;
481     }
482     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
483     z += 7;
484   }
485 
486   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
487   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
488   PetscLogFlops(98*a->nz - A->m);
489   PetscFunctionReturn(0);
490 }
491 
492 /*
493     This will not work with MatScalar == float because it calls the BLAS
494 */
495 #undef __FUNCT__
496 #define __FUNCT__ "MatMult_SeqBAIJ_N"
497 int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
498 {
499   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
500   PetscScalar     *x,*z,*xb,*work,*workt;
501   MatScalar       *v;
502   int             ierr,mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
503   int             ncols,k;
504 
505   PetscFunctionBegin;
506   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
507   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
508 
509   idx   = a->j;
510   v     = a->a;
511   ii    = a->i;
512 
513 
514   if (!a->mult_work) {
515     k    = PetscMax(A->m,A->n);
516     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
517   }
518   work = a->mult_work;
519   for (i=0; i<mbs; i++) {
520     n     = ii[1] - ii[0]; ii++;
521     ncols = n*bs;
522     workt = work;
523     for (j=0; j<n; j++) {
524       xb = x + bs*(*idx++);
525       for (k=0; k<bs; k++) workt[k] = xb[k];
526       workt += bs;
527     }
528     Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z);
529     /* LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */
530     v += n*bs2;
531     z += bs;
532   }
533   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
534   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
535   PetscLogFlops(2*a->nz*bs2 - A->m);
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "MatMultAdd_SeqBAIJ_1"
541 int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
542 {
543   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
544   PetscScalar     *x,*y,*z,sum;
545   MatScalar       *v;
546   int             ierr,mbs=a->mbs,i,*idx,*ii,n;
547 
548   PetscFunctionBegin;
549   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
550   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
551   if (zz != yy) {
552     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
553   } else {
554     z = y;
555   }
556 
557   idx   = a->j;
558   v     = a->a;
559   ii    = a->i;
560 
561   for (i=0; i<mbs; i++) {
562     n    = ii[1] - ii[0]; ii++;
563     sum  = y[i];
564     while (n--) sum += *v++ * x[*idx++];
565     z[i] = sum;
566   }
567   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
568   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
569   if (zz != yy) {
570     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
571   }
572   PetscLogFlops(2*a->nz);
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "MatMultAdd_SeqBAIJ_2"
578 int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
579 {
580   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
581   PetscScalar     *x,*y,*z,*xb,sum1,sum2;
582   PetscScalar     x1,x2;
583   MatScalar       *v;
584   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
585 
586   PetscFunctionBegin;
587   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
588   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
589   if (zz != yy) {
590     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
591   } else {
592     z = y;
593   }
594 
595   idx   = a->j;
596   v     = a->a;
597   ii    = a->i;
598 
599   for (i=0; i<mbs; i++) {
600     n  = ii[1] - ii[0]; ii++;
601     sum1 = y[0]; sum2 = y[1];
602     for (j=0; j<n; j++) {
603       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
604       sum1 += v[0]*x1 + v[2]*x2;
605       sum2 += v[1]*x1 + v[3]*x2;
606       v += 4;
607     }
608     z[0] = sum1; z[1] = sum2;
609     z += 2; y += 2;
610   }
611   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
612   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
613   if (zz != yy) {
614     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
615   }
616   PetscLogFlops(4*a->nz);
617   PetscFunctionReturn(0);
618 }
619 
620 #undef __FUNCT__
621 #define __FUNCT__ "MatMultAdd_SeqBAIJ_3"
622 int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
623 {
624   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
625   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,x1,x2,x3;
626   MatScalar       *v;
627   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
628 
629   PetscFunctionBegin;
630   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
631   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
632   if (zz != yy) {
633     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
634   } else {
635     z = y;
636   }
637 
638   idx   = a->j;
639   v     = a->a;
640   ii    = a->i;
641 
642   for (i=0; i<mbs; i++) {
643     n  = ii[1] - ii[0]; ii++;
644     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
645     for (j=0; j<n; j++) {
646       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
647       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
648       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
649       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
650       v += 9;
651     }
652     z[0] = sum1; z[1] = sum2; z[2] = sum3;
653     z += 3; y += 3;
654   }
655   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
656   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
657   if (zz != yy) {
658     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
659   }
660   PetscLogFlops(18*a->nz);
661   PetscFunctionReturn(0);
662 }
663 
664 #undef __FUNCT__
665 #define __FUNCT__ "MatMultAdd_SeqBAIJ_4"
666 int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
667 {
668   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
669   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4;
670   MatScalar       *v;
671   int             ierr,mbs=a->mbs,i,*idx,*ii;
672   int             j,n;
673 
674   PetscFunctionBegin;
675   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
676   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
677   if (zz != yy) {
678     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
679   } else {
680     z = y;
681   }
682 
683   idx   = a->j;
684   v     = a->a;
685   ii    = a->i;
686 
687   for (i=0; i<mbs; i++) {
688     n  = ii[1] - ii[0]; ii++;
689     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
690     for (j=0; j<n; j++) {
691       xb = x + 4*(*idx++);
692       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
693       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
694       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
695       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
696       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
697       v += 16;
698     }
699     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
700     z += 4; y += 4;
701   }
702   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
703   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
704   if (zz != yy) {
705     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
706   }
707   PetscLogFlops(32*a->nz);
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMultAdd_SeqBAIJ_5"
713 int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
714 {
715   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
716   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
717   MatScalar       *v;
718   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
719 
720   PetscFunctionBegin;
721   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
722   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
723   if (zz != yy) {
724     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
725   } else {
726     z = y;
727   }
728 
729   idx   = a->j;
730   v     = a->a;
731   ii    = a->i;
732 
733   for (i=0; i<mbs; i++) {
734     n  = ii[1] - ii[0]; ii++;
735     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
736     for (j=0; j<n; j++) {
737       xb = x + 5*(*idx++);
738       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
739       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
740       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
741       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
742       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
743       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
744       v += 25;
745     }
746     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
747     z += 5; y += 5;
748   }
749   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
750   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
751   if (zz != yy) {
752     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
753   }
754   PetscLogFlops(50*a->nz);
755   PetscFunctionReturn(0);
756 }
757 #undef __FUNCT__
758 #define __FUNCT__ "MatMultAdd_SeqBAIJ_6"
759 int MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
760 {
761   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
762   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
763   PetscScalar     x1,x2,x3,x4,x5,x6;
764   MatScalar       *v;
765   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
766 
767   PetscFunctionBegin;
768   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
769   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
770   if (zz != yy) {
771     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
772   } else {
773     z = y;
774   }
775 
776   idx   = a->j;
777   v     = a->a;
778   ii    = a->i;
779 
780   for (i=0; i<mbs; i++) {
781     n  = ii[1] - ii[0]; ii++;
782     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5];
783     for (j=0; j<n; j++) {
784       xb = x + 6*(*idx++);
785       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
786       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
787       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
788       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
789       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
790       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
791       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
792       v += 36;
793     }
794     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
795     z += 6; y += 6;
796   }
797   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
798   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
799   if (zz != yy) {
800     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
801   }
802   PetscLogFlops(72*a->nz);
803   PetscFunctionReturn(0);
804 }
805 
806 #undef __FUNCT__
807 #define __FUNCT__ "MatMultAdd_SeqBAIJ_7"
808 int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
809 {
810   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
811   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
812   PetscScalar     x1,x2,x3,x4,x5,x6,x7;
813   MatScalar       *v;
814   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
815 
816   PetscFunctionBegin;
817   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
818   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
819   if (zz != yy) {
820     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
821   } else {
822     z = y;
823   }
824 
825   idx   = a->j;
826   v     = a->a;
827   ii    = a->i;
828 
829   for (i=0; i<mbs; i++) {
830     n  = ii[1] - ii[0]; ii++;
831     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
832     for (j=0; j<n; j++) {
833       xb = x + 7*(*idx++);
834       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
835       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
836       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
837       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
838       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
839       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
840       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
841       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
842       v += 49;
843     }
844     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
845     z += 7; y += 7;
846   }
847   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
848   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
849   if (zz != yy) {
850     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
851   }
852   PetscLogFlops(98*a->nz);
853   PetscFunctionReturn(0);
854 }
855 
856 #undef __FUNCT__
857 #define __FUNCT__ "MatMultAdd_SeqBAIJ_N"
858 int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
859 {
860   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
861   PetscScalar    *x,*z,*xb,*work,*workt,*y;
862   MatScalar      *v;
863   int            mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
864   int            ncols,k;
865 
866   PetscFunctionBegin;
867   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
868   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
869   if (zz != yy) {
870     ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
871     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
872     ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
873   }
874 
875   idx   = a->j;
876   v     = a->a;
877   ii    = a->i;
878 
879 
880   if (!a->mult_work) {
881     k    = PetscMax(A->m,A->n);
882     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
883   }
884   work = a->mult_work;
885   for (i=0; i<mbs; i++) {
886     n     = ii[1] - ii[0]; ii++;
887     ncols = n*bs;
888     workt = work;
889     for (j=0; j<n; j++) {
890       xb = x + bs*(*idx++);
891       for (k=0; k<bs; k++) workt[k] = xb[k];
892       workt += bs;
893     }
894     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
895     /* LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */
896     v += n*bs2;
897     z += bs;
898   }
899   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
900   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
901   PetscLogFlops(2*a->nz*bs2);
902   PetscFunctionReturn(0);
903 }
904 
905 #undef __FUNCT__
906 #define __FUNCT__ "MatMultTranspose_SeqBAIJ"
907 int MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz)
908 {
909   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
910   PetscScalar     *xg,*zg,*zb,zero = 0.0;
911   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
912   MatScalar       *v;
913   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval;
914   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
915 
916   PetscFunctionBegin;
917   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
918   ierr = VecGetArray(xx,&xg);CHKERRQ(ierr); x = xg;
919   ierr = VecGetArray(zz,&zg);CHKERRQ(ierr); z = zg;
920 
921   idx   = a->j;
922   v     = a->a;
923   ii    = a->i;
924   xb    = x;
925   switch (bs) {
926   case 1:
927     for (i=0; i<mbs; i++) {
928       n  = ii[1] - ii[0]; ii++;
929       x1 = xb[0];
930       ib = idx + ai[i];
931       for (j=0; j<n; j++) {
932         rval    = ib[j];
933         z[rval] += *v * x1;
934         v++;
935       }
936       xb++;
937     }
938     break;
939   case 2:
940     for (i=0; i<mbs; i++) {
941       n  = ii[1] - ii[0]; ii++;
942       x1 = xb[0]; x2 = xb[1];
943       ib = idx + ai[i];
944       for (j=0; j<n; j++) {
945         rval      = ib[j]*2;
946         z[rval++] += v[0]*x1 + v[1]*x2;
947         z[rval]   += v[2]*x1 + v[3]*x2;
948         v  += 4;
949       }
950       xb += 2;
951     }
952     break;
953   case 3:
954     for (i=0; i<mbs; i++) {
955       n  = ii[1] - ii[0]; ii++;
956       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
957       ib = idx + ai[i];
958       for (j=0; j<n; j++) {
959         rval      = ib[j]*3;
960         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
961         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
962         z[rval]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
963         v  += 9;
964       }
965       xb += 3;
966     }
967     break;
968   case 4:
969     for (i=0; i<mbs; i++) {
970       n  = ii[1] - ii[0]; ii++;
971       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
972       ib = idx + ai[i];
973       for (j=0; j<n; j++) {
974         rval      = ib[j]*4;
975         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
976         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
977         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
978         z[rval]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
979         v  += 16;
980       }
981       xb += 4;
982     }
983     break;
984   case 5:
985     for (i=0; i<mbs; i++) {
986       n  = ii[1] - ii[0]; ii++;
987       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
988       x4 = xb[3]; x5 = xb[4];
989       ib = idx + ai[i];
990       for (j=0; j<n; j++) {
991         rval      = ib[j]*5;
992         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
993         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
994         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
995         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
996         z[rval]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
997         v  += 25;
998       }
999       xb += 5;
1000     }
1001     break;
1002   case 6:
1003     for (i=0; i<mbs; i++) {
1004       n  = ii[1] - ii[0]; ii++;
1005       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1006       x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
1007       ib = idx + ai[i];
1008       for (j=0; j<n; j++) {
1009         rval      = ib[j]*6;
1010         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 + v[4]*x5 + v[5]*x6;
1011         z[rval++] +=  v[6]*x1 +  v[7]*x2 +  v[8]*x3 +  v[9]*x4 + v[10]*x5 + v[11]*x6;
1012         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4 + v[16]*x5 + v[17]*x6;
1013         z[rval++] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4 + v[22]*x5 + v[23]*x6;
1014         z[rval++] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4 + v[28]*x5 + v[29]*x6;
1015         z[rval]   += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4 + v[34]*x5 + v[35]*x6;
1016         v  += 36;
1017       }
1018       xb += 6;
1019     }
1020     break;
1021   case 7:
1022     for (i=0; i<mbs; i++) {
1023       n  = ii[1] - ii[0]; ii++;
1024       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1025       x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1026       ib = idx + ai[i];
1027       for (j=0; j<n; j++) {
1028         rval      = ib[j]*7;
1029         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 + v[4]*x5 + v[5]*x6 + v[6]*x7;
1030         z[rval++] +=  v[7]*x1 +  v[8]*x2 +  v[9]*x3 + v[10]*x4 + v[11]*x5 + v[12]*x6 + v[13]*x7;
1031         z[rval++] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4 + v[18]*x5 + v[19]*x6 + v[20]*x7;
1032         z[rval++] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4 + v[25]*x5 + v[26]*x6 + v[27]*x7;
1033         z[rval++] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4 + v[32]*x5 + v[33]*x6 + v[34]*x7;
1034         z[rval++] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4 + v[39]*x5 + v[40]*x6 + v[41]*x7;
1035         z[rval]   += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4 + v[46]*x5 + v[47]*x6 + v[48]*x7;
1036         v  += 49;
1037       }
1038       xb += 7;
1039     }
1040     break;
1041   default: {       /* block sizes larger then 7 by 7 are handled by BLAS */
1042       int          ncols,k;
1043       PetscScalar  *work,*workt;
1044 
1045       if (!a->mult_work) {
1046         k = PetscMax(A->m,A->n);
1047         ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1048       }
1049       work = a->mult_work;
1050       for (i=0; i<mbs; i++) {
1051         n     = ii[1] - ii[0]; ii++;
1052         ncols = n*bs;
1053         ierr  = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1054         Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,work);
1055         /* LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); */
1056         v += n*bs2;
1057         x += bs;
1058         workt = work;
1059         for (j=0; j<n; j++) {
1060           zb = z + bs*(*idx++);
1061           for (k=0; k<bs; k++) zb[k] += workt[k] ;
1062           workt += bs;
1063         }
1064       }
1065     }
1066   }
1067   ierr = VecRestoreArray(xx,&xg);CHKERRQ(ierr);
1068   ierr = VecRestoreArray(zz,&zg);CHKERRQ(ierr);
1069   PetscLogFlops(2*a->nz*a->bs2 - A->n);
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ"
1075 int MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1076 
1077 {
1078   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1079   PetscScalar     *xg,*zg,*zb,*x,*z,*xb,x1,x2,x3,x4,x5;
1080   MatScalar       *v;
1081   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1082 
1083   PetscFunctionBegin;
1084   if (yy != zz) { ierr = VecCopy(yy,zz);CHKERRQ(ierr); }
1085   ierr = VecGetArray(xx,&xg);CHKERRQ(ierr); x = xg;
1086   ierr = VecGetArray(zz,&zg);CHKERRQ(ierr); z = zg;
1087 
1088 
1089   idx   = a->j;
1090   v     = a->a;
1091   ii    = a->i;
1092   xb    = x;
1093 
1094   switch (bs) {
1095   case 1:
1096     for (i=0; i<mbs; i++) {
1097       n  = ii[1] - ii[0]; ii++;
1098       x1 = xb[0];
1099       ib = idx + ai[i];
1100       for (j=0; j<n; j++) {
1101         rval    = ib[j];
1102         z[rval] += *v * x1;
1103         v++;
1104       }
1105       xb++;
1106     }
1107     break;
1108   case 2:
1109     for (i=0; i<mbs; i++) {
1110       n  = ii[1] - ii[0]; ii++;
1111       x1 = xb[0]; x2 = xb[1];
1112       ib = idx + ai[i];
1113       for (j=0; j<n; j++) {
1114         rval      = ib[j]*2;
1115         z[rval++] += v[0]*x1 + v[1]*x2;
1116         z[rval++] += v[2]*x1 + v[3]*x2;
1117         v  += 4;
1118       }
1119       xb += 2;
1120     }
1121     break;
1122   case 3:
1123     for (i=0; i<mbs; i++) {
1124       n  = ii[1] - ii[0]; ii++;
1125       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1126       ib = idx + ai[i];
1127       for (j=0; j<n; j++) {
1128         rval      = ib[j]*3;
1129         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1130         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1131         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1132         v  += 9;
1133       }
1134       xb += 3;
1135     }
1136     break;
1137   case 4:
1138     for (i=0; i<mbs; i++) {
1139       n  = ii[1] - ii[0]; ii++;
1140       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1141       ib = idx + ai[i];
1142       for (j=0; j<n; j++) {
1143         rval      = ib[j]*4;
1144         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1145         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1146         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1147         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1148         v  += 16;
1149       }
1150       xb += 4;
1151     }
1152     break;
1153   case 5:
1154     for (i=0; i<mbs; i++) {
1155       n  = ii[1] - ii[0]; ii++;
1156       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1157       x4 = xb[3]; x5 = xb[4];
1158       ib = idx + ai[i];
1159       for (j=0; j<n; j++) {
1160         rval      = ib[j]*5;
1161         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1162         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1163         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1164         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1165         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1166         v  += 25;
1167       }
1168       xb += 5;
1169     }
1170     break;
1171   default: {      /* block sizes larger then 5 by 5 are handled by BLAS */
1172       int          ncols,k;
1173       PetscScalar  *work,*workt;
1174 
1175       if (!a->mult_work) {
1176         k = PetscMax(A->m,A->n);
1177         ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1178       }
1179       work = a->mult_work;
1180       for (i=0; i<mbs; i++) {
1181         n     = ii[1] - ii[0]; ii++;
1182         ncols = n*bs;
1183         ierr  = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1184         Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,work);
1185         /* LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); */
1186         v += n*bs2;
1187         x += bs;
1188         workt = work;
1189         for (j=0; j<n; j++) {
1190           zb = z + bs*(*idx++);
1191           for (k=0; k<bs; k++) zb[k] += workt[k] ;
1192           workt += bs;
1193         }
1194       }
1195     }
1196   }
1197   ierr = VecRestoreArray(xx,&xg);CHKERRQ(ierr);
1198   ierr = VecRestoreArray(zz,&zg);CHKERRQ(ierr);
1199   PetscLogFlops(2*a->nz*a->bs2);
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 #undef __FUNCT__
1204 #define __FUNCT__ "MatScale_SeqBAIJ"
1205 int MatScale_SeqBAIJ(PetscScalar *alpha,Mat inA)
1206 {
1207   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1208   int         totalnz = a->bs2*a->nz;
1209 #if defined(PETSC_USE_MAT_SINGLE)
1210   int         i;
1211 #else
1212   int         one = 1;
1213 #endif
1214 
1215   PetscFunctionBegin;
1216 #if defined(PETSC_USE_MAT_SINGLE)
1217   for (i=0; i<totalnz; i++) a->a[i] *= *alpha;
1218 #else
1219   BLscal_(&totalnz,alpha,a->a,&one);
1220 #endif
1221   PetscLogFlops(totalnz);
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 #undef __FUNCT__
1226 #define __FUNCT__ "MatNorm_SeqBAIJ"
1227 int MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm)
1228 {
1229   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1230   MatScalar   *v = a->a;
1231   PetscReal   sum = 0.0;
1232   int         i,j,k,bs = a->bs,nz=a->nz,bs2=a->bs2,k1;
1233 
1234   PetscFunctionBegin;
1235   if (type == NORM_FROBENIUS) {
1236     for (i=0; i< bs2*nz; i++) {
1237 #if defined(PETSC_USE_COMPLEX)
1238       sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1239 #else
1240       sum += (*v)*(*v); v++;
1241 #endif
1242     }
1243     *norm = sqrt(sum);
1244   }  else if (type == NORM_INFINITY) { /* maximum row sum */
1245     *norm = 0.0;
1246     for (k=0; k<bs; k++) {
1247       for (j=0; j<a->mbs; j++) {
1248         v = a->a + bs2*a->i[j] + k;
1249         sum = 0.0;
1250         for (i=0; i<a->i[j+1]-a->i[j]; i++) {
1251           for (k1=0; k1<bs; k1++){
1252             sum += PetscAbsScalar(*v);
1253             v   += bs;
1254           }
1255         }
1256         if (sum > *norm) *norm = sum;
1257       }
1258     }
1259   } else {
1260     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1261   }
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatEqual_SeqBAIJ"
1268 int MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg)
1269 {
1270   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data;
1271   int         ierr;
1272   PetscTruth  flag;
1273 
1274   PetscFunctionBegin;
1275   ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flag);CHKERRQ(ierr);
1276   if (!flag) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1277 
1278   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1279   if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->nz != b->nz)) {
1280     *flg = PETSC_FALSE;
1281     PetscFunctionReturn(0);
1282   }
1283 
1284   /* if the a->i are the same */
1285   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr);
1286   if (*flg == PETSC_FALSE) {
1287     PetscFunctionReturn(0);
1288   }
1289 
1290   /* if a->j are the same */
1291   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int),flg);CHKERRQ(ierr);
1292   if (*flg == PETSC_FALSE) {
1293     PetscFunctionReturn(0);
1294   }
1295   /* if a->a are the same */
1296   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1297   PetscFunctionReturn(0);
1298 
1299 }
1300 
1301 #undef __FUNCT__
1302 #define __FUNCT__ "MatGetDiagonal_SeqBAIJ"
1303 int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1304 {
1305   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1306   int          ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1307   PetscScalar  *x,zero = 0.0;
1308   MatScalar    *aa,*aa_j;
1309 
1310   PetscFunctionBegin;
1311   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1312   bs   = a->bs;
1313   aa   = a->a;
1314   ai   = a->i;
1315   aj   = a->j;
1316   ambs = a->mbs;
1317   bs2  = a->bs2;
1318 
1319   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1320   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1321   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1322   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1323   for (i=0; i<ambs; i++) {
1324     for (j=ai[i]; j<ai[i+1]; j++) {
1325       if (aj[j] == i) {
1326         row  = i*bs;
1327         aa_j = aa+j*bs2;
1328         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1329         break;
1330       }
1331     }
1332   }
1333   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatDiagonalScale_SeqBAIJ"
1339 int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1340 {
1341   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1342   PetscScalar  *l,*r,x,*li,*ri;
1343   MatScalar    *aa,*v;
1344   int          ierr,i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1345 
1346   PetscFunctionBegin;
1347   ai  = a->i;
1348   aj  = a->j;
1349   aa  = a->a;
1350   m   = A->m;
1351   n   = A->n;
1352   bs  = a->bs;
1353   mbs = a->mbs;
1354   bs2 = a->bs2;
1355   if (ll) {
1356     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
1357     ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1358     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1359     for (i=0; i<mbs; i++) { /* for each block row */
1360       M  = ai[i+1] - ai[i];
1361       li = l + i*bs;
1362       v  = aa + bs2*ai[i];
1363       for (j=0; j<M; j++) { /* for each block */
1364         for (k=0; k<bs2; k++) {
1365           (*v++) *= li[k%bs];
1366         }
1367       }
1368     }
1369     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1370     PetscLogFlops(a->nz);
1371   }
1372 
1373   if (rr) {
1374     ierr = VecGetArray(rr,&r);CHKERRQ(ierr);
1375     ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr);
1376     if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
1377     for (i=0; i<mbs; i++) { /* for each block row */
1378       M  = ai[i+1] - ai[i];
1379       v  = aa + bs2*ai[i];
1380       for (j=0; j<M; j++) { /* for each block */
1381         ri = r + bs*aj[ai[i]+j];
1382         for (k=0; k<bs; k++) {
1383           x = ri[k];
1384           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
1385         }
1386       }
1387     }
1388     ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr);
1389     PetscLogFlops(a->nz);
1390   }
1391   PetscFunctionReturn(0);
1392 }
1393 
1394 
1395 #undef __FUNCT__
1396 #define __FUNCT__ "MatGetInfo_SeqBAIJ"
1397 int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1398 {
1399   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1400 
1401   PetscFunctionBegin;
1402   info->rows_global    = (double)A->m;
1403   info->columns_global = (double)A->n;
1404   info->rows_local     = (double)A->m;
1405   info->columns_local  = (double)A->n;
1406   info->block_size     = a->bs2;
1407   info->nz_allocated   = a->maxnz;
1408   info->nz_used        = a->bs2*a->nz;
1409   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1410   info->assemblies   = A->num_ass;
1411   info->mallocs      = a->reallocs;
1412   info->memory       = A->mem;
1413   if (A->factor) {
1414     info->fill_ratio_given  = A->info.fill_ratio_given;
1415     info->fill_ratio_needed = A->info.fill_ratio_needed;
1416     info->factor_mallocs    = A->info.factor_mallocs;
1417   } else {
1418     info->fill_ratio_given  = 0;
1419     info->fill_ratio_needed = 0;
1420     info->factor_mallocs    = 0;
1421   }
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 
1426 #undef __FUNCT__
1427 #define __FUNCT__ "MatZeroEntries_SeqBAIJ"
1428 int MatZeroEntries_SeqBAIJ(Mat A)
1429 {
1430   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1431   int         ierr;
1432 
1433   PetscFunctionBegin;
1434   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
1435   PetscFunctionReturn(0);
1436 }
1437