xref: /petsc/src/mat/impls/baij/seq/baij2.c (revision 6849ba73f22fecb8f92ef896a42e4e8bd4cd6965)
1 
2 #include "src/mat/impls/baij/seq/baij.h"
3 #include "src/inline/spops.h"
4 #include "src/inline/ilu.h"
5 #include "petscbt.h"
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ"
9 PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat A,int is_max,IS is[],int ov)
10 {
11   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
12   PetscErrorCode ierr;
13   int         row,i,j,k,l,m,n,*idx,*nidx,isz,val,ival;
14   int         start,end,*ai,*aj,bs,*nidx2;
15   PetscBT     table;
16 
17   PetscFunctionBegin;
18   m     = a->mbs;
19   ai    = a->i;
20   aj    = a->j;
21   bs    = a->bs;
22 
23   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
24 
25   ierr = PetscBTCreate(m,table);CHKERRQ(ierr);
26   ierr = PetscMalloc((m+1)*sizeof(int),&nidx);CHKERRQ(ierr);
27   ierr = PetscMalloc((A->m+1)*sizeof(int),&nidx2);CHKERRQ(ierr);
28 
29   for (i=0; i<is_max; i++) {
30     /* Initialise the two local arrays */
31     isz  = 0;
32     ierr = PetscBTMemzero(m,table);CHKERRQ(ierr);
33 
34     /* Extract the indices, assume there can be duplicate entries */
35     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
36     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
37 
38     /* Enter these into the temp arrays i.e mark table[row], enter row into new index */
39     for (j=0; j<n ; ++j){
40       ival = idx[j]/bs; /* convert the indices into block indices */
41       if (ival>=m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
42       if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;}
43     }
44     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
45     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
46 
47     k = 0;
48     for (j=0; j<ov; j++){ /* for each overlap*/
49       n = isz;
50       for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */
51         row   = nidx[k];
52         start = ai[row];
53         end   = ai[row+1];
54         for (l = start; l<end ; l++){
55           val = aj[l];
56           if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;}
57         }
58       }
59     }
60     /* expand the Index Set */
61     for (j=0; j<isz; j++) {
62       for (k=0; k<bs; k++)
63         nidx2[j*bs+k] = nidx[j]*bs+k;
64     }
65     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
66   }
67   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
68   ierr = PetscFree(nidx);CHKERRQ(ierr);
69   ierr = PetscFree(nidx2);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 
73 #undef __FUNCT__
74 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private"
75 PetscErrorCode MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
76 {
77   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data,*c;
78   PetscErrorCode ierr;
79   int          *smap,i,k,kstart,kend,oldcols = a->nbs,*lens;
80   int          row,mat_i,*mat_j,tcol,*mat_ilen;
81   int          *irow,*icol,nrows,ncols,*ssmap,bs=a->bs,bs2=a->bs2;
82   int          *aj = a->j,*ai = a->i;
83   MatScalar    *mat_a;
84   Mat          C;
85   PetscTruth   flag;
86 
87   PetscFunctionBegin;
88   ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr);
89   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
90 
91   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
92   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
93   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
94   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
95 
96   ierr = PetscMalloc((1+oldcols)*sizeof(int),&smap);CHKERRQ(ierr);
97   ssmap = smap;
98   ierr = PetscMalloc((1+nrows)*sizeof(int),&lens);CHKERRQ(ierr);
99   ierr  = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr);
100   for (i=0; i<ncols; i++) smap[icol[i]] = i+1;
101   /* determine lens of each row */
102   for (i=0; i<nrows; i++) {
103     kstart  = ai[irow[i]];
104     kend    = kstart + a->ilen[irow[i]];
105     lens[i] = 0;
106       for (k=kstart; k<kend; k++) {
107         if (ssmap[aj[k]]) {
108           lens[i]++;
109         }
110       }
111     }
112   /* Create and fill new matrix */
113   if (scall == MAT_REUSE_MATRIX) {
114     c = (Mat_SeqBAIJ *)((*B)->data);
115 
116     if (c->mbs!=nrows || c->nbs!=ncols || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
117     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr);
118     if (flag == PETSC_FALSE) {
119       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
120     }
121     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr);
122     C = *B;
123   } else {
124     ierr = MatCreate(A->comm,nrows*bs,ncols*bs,PETSC_DETERMINE,PETSC_DETERMINE,&C);CHKERRQ(ierr);
125     ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
126     ierr = MatSeqBAIJSetPreallocation(C,bs,0,lens);CHKERRQ(ierr);
127   }
128   c = (Mat_SeqBAIJ *)(C->data);
129   for (i=0; i<nrows; i++) {
130     row    = irow[i];
131     kstart = ai[row];
132     kend   = kstart + a->ilen[row];
133     mat_i  = c->i[i];
134     mat_j  = c->j + mat_i;
135     mat_a  = c->a + mat_i*bs2;
136     mat_ilen = c->ilen + i;
137     for (k=kstart; k<kend; k++) {
138       if ((tcol=ssmap[a->j[k]])) {
139         *mat_j++ = tcol - 1;
140         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
141         mat_a   += bs2;
142         (*mat_ilen)++;
143       }
144     }
145   }
146 
147   /* Free work space */
148   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
149   ierr = PetscFree(smap);CHKERRQ(ierr);
150   ierr = PetscFree(lens);CHKERRQ(ierr);
151   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
152   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
153 
154   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
155   *B = C;
156   PetscFunctionReturn(0);
157 }
158 
159 #undef __FUNCT__
160 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ"
161 PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
162 {
163   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
164   IS          is1,is2;
165   PetscErrorCode ierr;
166   int         *vary,*iary,*irow,*icol,nrows,ncols,i,bs=a->bs,count;
167 
168   PetscFunctionBegin;
169   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
170   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
171   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
172   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
173 
174   /* Verify if the indices corespond to each element in a block
175    and form the IS with compressed IS */
176   ierr = PetscMalloc(2*(a->mbs+1)*sizeof(int),&vary);CHKERRQ(ierr);
177   iary = vary + a->mbs;
178   ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr);
179   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
180   count = 0;
181   for (i=0; i<a->mbs; i++) {
182     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Index set does not match blocks");
183     if (vary[i]==bs) iary[count++] = i;
184   }
185   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
186 
187   ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr);
188   for (i=0; i<ncols; i++) vary[icol[i]/bs]++;
189   count = 0;
190   for (i=0; i<a->mbs; i++) {
191     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_PLIB,"Internal error in PETSc");
192     if (vary[i]==bs) iary[count++] = i;
193   }
194   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);CHKERRQ(ierr);
195   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
196   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
197   ierr = PetscFree(vary);CHKERRQ(ierr);
198 
199   ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,cs,scall,B);CHKERRQ(ierr);
200   ISDestroy(is1);
201   ISDestroy(is2);
202   PetscFunctionReturn(0);
203 }
204 
205 #undef __FUNCT__
206 #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ"
207 PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
208 {
209   PetscErrorCode ierr;
210   int i;
211 
212   PetscFunctionBegin;
213   if (scall == MAT_INITIAL_MATRIX) {
214     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
215   }
216 
217   for (i=0; i<n; i++) {
218     ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr);
219   }
220   PetscFunctionReturn(0);
221 }
222 
223 
224 /* -------------------------------------------------------*/
225 /* Should check that shapes of vectors and matrices match */
226 /* -------------------------------------------------------*/
227 #include "petscblaslapack.h"
228 
229 #undef __FUNCT__
230 #define __FUNCT__ "MatMult_SeqBAIJ_1"
231 PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
232 {
233   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
234   PetscScalar     *x,*z,sum;
235   MatScalar       *v;
236   PetscErrorCode ierr;
237   int             mbs=a->mbs,i,*idx,*ii,n;
238 
239   PetscFunctionBegin;
240   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
241   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
242 
243   idx   = a->j;
244   v     = a->a;
245   ii    = a->i;
246 
247   for (i=0; i<mbs; i++) {
248     n    = ii[1] - ii[0]; ii++;
249     sum  = 0.0;
250     while (n--) sum += *v++ * x[*idx++];
251     z[i] = sum;
252   }
253   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
254   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
255   PetscLogFlops(2*a->nz - A->m);
256   PetscFunctionReturn(0);
257 }
258 
259 #undef __FUNCT__
260 #define __FUNCT__ "MatMult_SeqBAIJ_2"
261 PetscErrorCode MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
262 {
263   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
264   PetscScalar     *x,*z,*xb,sum1,sum2;
265   PetscScalar     x1,x2;
266   MatScalar       *v;
267   PetscErrorCode ierr;
268   int mbs=a->mbs,i,*idx,*ii,j,n;
269 
270   PetscFunctionBegin;
271   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
272   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
273 
274   idx   = a->j;
275   v     = a->a;
276   ii    = a->i;
277 
278   for (i=0; i<mbs; i++) {
279     n  = ii[1] - ii[0]; ii++;
280     sum1 = 0.0; sum2 = 0.0;
281     for (j=0; j<n; j++) {
282       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
283       sum1 += v[0]*x1 + v[2]*x2;
284       sum2 += v[1]*x1 + v[3]*x2;
285       v += 4;
286     }
287     z[0] = sum1; z[1] = sum2;
288     z += 2;
289   }
290   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
291   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
292   PetscLogFlops(8*a->nz - A->m);
293   PetscFunctionReturn(0);
294 }
295 
296 #undef __FUNCT__
297 #define __FUNCT__ "MatMult_SeqBAIJ_3"
298 PetscErrorCode MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
299 {
300   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
301   PetscScalar  *x,*z,*xb,sum1,sum2,sum3,x1,x2,x3;
302   MatScalar    *v;
303   PetscErrorCode ierr;
304   int  mbs=a->mbs,i,*idx,*ii,j,n;
305 
306 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
307 #pragma disjoint(*v,*z,*xb)
308 #endif
309 
310   PetscFunctionBegin;
311   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
312   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
313 
314   idx   = a->j;
315   v     = a->a;
316   ii    = a->i;
317 
318   for (i=0; i<mbs; i++) {
319     n  = ii[1] - ii[0]; ii++;
320     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
321     for (j=0; j<n; j++) {
322       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
323       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
324       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
325       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
326       v += 9;
327     }
328     z[0] = sum1; z[1] = sum2; z[2] = sum3;
329     z += 3;
330   }
331   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
332   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
333   PetscLogFlops(18*a->nz - A->m);
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNCT__
338 #define __FUNCT__ "MatMult_SeqBAIJ_4"
339 PetscErrorCode MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
340 {
341   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
342   PetscScalar     *x,*z,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4;
343   MatScalar       *v;
344   PetscErrorCode ierr;
345   int mbs=a->mbs,i,*idx,*ii,j,n;
346 
347   PetscFunctionBegin;
348   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
349   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
350 
351   idx   = a->j;
352   v     = a->a;
353   ii    = a->i;
354 
355   for (i=0; i<mbs; i++) {
356     n  = ii[1] - ii[0]; ii++;
357     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
358     for (j=0; j<n; j++) {
359       xb = x + 4*(*idx++);
360       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
361       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
362       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
363       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
364       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
365       v += 16;
366     }
367     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
368     z += 4;
369   }
370   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
371   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
372   PetscLogFlops(32*a->nz - A->m);
373   PetscFunctionReturn(0);
374 }
375 
376 #undef __FUNCT__
377 #define __FUNCT__ "MatMult_SeqBAIJ_5"
378 PetscErrorCode MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
379 {
380   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
381   PetscScalar     sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*xb,*z,*x;
382   MatScalar       *v;
383   PetscErrorCode ierr;
384   int mbs=a->mbs,i,*idx,*ii,j,n;
385 
386   PetscFunctionBegin;
387   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
388   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
389 
390   idx   = a->j;
391   v     = a->a;
392   ii    = a->i;
393 
394   for (i=0; i<mbs; i++) {
395     n  = ii[1] - ii[0]; ii++;
396     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
397     for (j=0; j<n; j++) {
398       xb = x + 5*(*idx++);
399       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
400       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
401       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
402       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
403       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
404       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
405       v += 25;
406     }
407     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
408     z += 5;
409   }
410   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
411   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
412   PetscLogFlops(50*a->nz - A->m);
413   PetscFunctionReturn(0);
414 }
415 
416 
417 #undef __FUNCT__
418 #define __FUNCT__ "MatMult_SeqBAIJ_6"
419 PetscErrorCode MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz)
420 {
421   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
422   PetscScalar     *x,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
423   PetscScalar     x1,x2,x3,x4,x5,x6;
424   MatScalar       *v;
425   PetscErrorCode ierr;
426   int   mbs=a->mbs,i,*idx,*ii,j,n;
427 
428   PetscFunctionBegin;
429   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
430   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
431 
432   idx   = a->j;
433   v     = a->a;
434   ii    = a->i;
435 
436   for (i=0; i<mbs; i++) {
437     n  = ii[1] - ii[0]; ii++;
438     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0;
439     for (j=0; j<n; j++) {
440       xb = x + 6*(*idx++);
441       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
442       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
443       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
444       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
445       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
446       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
447       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
448       v += 36;
449     }
450     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
451     z += 6;
452   }
453 
454   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
455   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
456   PetscLogFlops(72*a->nz - A->m);
457   PetscFunctionReturn(0);
458 }
459 #undef __FUNCT__
460 #define __FUNCT__ "MatMult_SeqBAIJ_7"
461 PetscErrorCode MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
462 {
463   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
464   PetscScalar     *x,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
465   PetscScalar     x1,x2,x3,x4,x5,x6,x7;
466   MatScalar       *v;
467   PetscErrorCode ierr;
468   int mbs=a->mbs,i,*idx,*ii,j,n;
469 
470   PetscFunctionBegin;
471   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
472   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
473 
474   idx   = a->j;
475   v     = a->a;
476   ii    = a->i;
477 
478   for (i=0; i<mbs; i++) {
479     n  = ii[1] - ii[0]; ii++;
480     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
481     for (j=0; j<n; j++) {
482       xb = x + 7*(*idx++);
483       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
484       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
485       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
486       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
487       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
488       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
489       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
490       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
491       v += 49;
492     }
493     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
494     z += 7;
495   }
496 
497   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
498   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
499   PetscLogFlops(98*a->nz - A->m);
500   PetscFunctionReturn(0);
501 }
502 
503 /*
504     This will not work with MatScalar == float because it calls the BLAS
505 */
506 #undef __FUNCT__
507 #define __FUNCT__ "MatMult_SeqBAIJ_N"
508 PetscErrorCode MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
509 {
510   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
511   PetscScalar     *x,*z,*xb,*work,*workt;
512   MatScalar       *v;
513   PetscErrorCode ierr;
514   int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
515   int             ncols,k;
516 
517   PetscFunctionBegin;
518   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
519   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
520 
521   idx   = a->j;
522   v     = a->a;
523   ii    = a->i;
524 
525 
526   if (!a->mult_work) {
527     k    = PetscMax(A->m,A->n);
528     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
529   }
530   work = a->mult_work;
531   for (i=0; i<mbs; i++) {
532     n     = ii[1] - ii[0]; ii++;
533     ncols = n*bs;
534     workt = work;
535     for (j=0; j<n; j++) {
536       xb = x + bs*(*idx++);
537       for (k=0; k<bs; k++) workt[k] = xb[k];
538       workt += bs;
539     }
540     Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z);
541     /* LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */
542     v += n*bs2;
543     z += bs;
544   }
545   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
546   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
547   PetscLogFlops(2*a->nz*bs2 - A->m);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatMultAdd_SeqBAIJ_1"
553 PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
554 {
555   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
556   PetscScalar     *x,*y,*z,sum;
557   MatScalar       *v;
558   PetscErrorCode ierr;
559   int mbs=a->mbs,i,*idx,*ii,n;
560 
561   PetscFunctionBegin;
562   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
563   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
564   if (zz != yy) {
565     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
566   } else {
567     z = y;
568   }
569 
570   idx   = a->j;
571   v     = a->a;
572   ii    = a->i;
573 
574   for (i=0; i<mbs; i++) {
575     n    = ii[1] - ii[0]; ii++;
576     sum  = y[i];
577     while (n--) sum += *v++ * x[*idx++];
578     z[i] = sum;
579   }
580   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
581   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
582   if (zz != yy) {
583     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
584   }
585   PetscLogFlops(2*a->nz);
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatMultAdd_SeqBAIJ_2"
591 PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
592 {
593   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
594   PetscScalar     *x,*y,*z,*xb,sum1,sum2;
595   PetscScalar     x1,x2;
596   MatScalar       *v;
597   PetscErrorCode ierr;
598   int mbs=a->mbs,i,*idx,*ii,j,n;
599 
600   PetscFunctionBegin;
601   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
602   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
603   if (zz != yy) {
604     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
605   } else {
606     z = y;
607   }
608 
609   idx   = a->j;
610   v     = a->a;
611   ii    = a->i;
612 
613   for (i=0; i<mbs; i++) {
614     n  = ii[1] - ii[0]; ii++;
615     sum1 = y[0]; sum2 = y[1];
616     for (j=0; j<n; j++) {
617       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
618       sum1 += v[0]*x1 + v[2]*x2;
619       sum2 += v[1]*x1 + v[3]*x2;
620       v += 4;
621     }
622     z[0] = sum1; z[1] = sum2;
623     z += 2; y += 2;
624   }
625   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
626   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
627   if (zz != yy) {
628     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
629   }
630   PetscLogFlops(4*a->nz);
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatMultAdd_SeqBAIJ_3"
636 PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
637 {
638   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
639   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,x1,x2,x3;
640   MatScalar       *v;
641   PetscErrorCode ierr;
642   int mbs=a->mbs,i,*idx,*ii,j,n;
643 
644   PetscFunctionBegin;
645   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
646   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
647   if (zz != yy) {
648     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
649   } else {
650     z = y;
651   }
652 
653   idx   = a->j;
654   v     = a->a;
655   ii    = a->i;
656 
657   for (i=0; i<mbs; i++) {
658     n  = ii[1] - ii[0]; ii++;
659     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
660     for (j=0; j<n; j++) {
661       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
662       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
663       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
664       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
665       v += 9;
666     }
667     z[0] = sum1; z[1] = sum2; z[2] = sum3;
668     z += 3; y += 3;
669   }
670   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
671   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
672   if (zz != yy) {
673     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
674   }
675   PetscLogFlops(18*a->nz);
676   PetscFunctionReturn(0);
677 }
678 
679 #undef __FUNCT__
680 #define __FUNCT__ "MatMultAdd_SeqBAIJ_4"
681 PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
682 {
683   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
684   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4;
685   MatScalar       *v;
686   PetscErrorCode ierr;
687   int  mbs=a->mbs,i,*idx,*ii;
688   int             j,n;
689 
690   PetscFunctionBegin;
691   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
692   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
693   if (zz != yy) {
694     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
695   } else {
696     z = y;
697   }
698 
699   idx   = a->j;
700   v     = a->a;
701   ii    = a->i;
702 
703   for (i=0; i<mbs; i++) {
704     n  = ii[1] - ii[0]; ii++;
705     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
706     for (j=0; j<n; j++) {
707       xb = x + 4*(*idx++);
708       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
709       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
710       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
711       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
712       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
713       v += 16;
714     }
715     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
716     z += 4; y += 4;
717   }
718   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
719   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
720   if (zz != yy) {
721     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
722   }
723   PetscLogFlops(32*a->nz);
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatMultAdd_SeqBAIJ_5"
729 PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
730 {
731   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
732   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
733   MatScalar       *v;
734   PetscErrorCode ierr;
735   int mbs=a->mbs,i,*idx,*ii,j,n;
736 
737   PetscFunctionBegin;
738   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
739   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
740   if (zz != yy) {
741     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
742   } else {
743     z = y;
744   }
745 
746   idx   = a->j;
747   v     = a->a;
748   ii    = a->i;
749 
750   for (i=0; i<mbs; i++) {
751     n  = ii[1] - ii[0]; ii++;
752     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
753     for (j=0; j<n; j++) {
754       xb = x + 5*(*idx++);
755       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
756       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
757       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
758       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
759       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
760       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
761       v += 25;
762     }
763     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
764     z += 5; y += 5;
765   }
766   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
767   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
768   if (zz != yy) {
769     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
770   }
771   PetscLogFlops(50*a->nz);
772   PetscFunctionReturn(0);
773 }
774 #undef __FUNCT__
775 #define __FUNCT__ "MatMultAdd_SeqBAIJ_6"
776 PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
777 {
778   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
779   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
780   PetscScalar     x1,x2,x3,x4,x5,x6;
781   MatScalar       *v;
782   PetscErrorCode ierr;
783   int mbs=a->mbs,i,*idx,*ii,j,n;
784 
785   PetscFunctionBegin;
786   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
787   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
788   if (zz != yy) {
789     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
790   } else {
791     z = y;
792   }
793 
794   idx   = a->j;
795   v     = a->a;
796   ii    = a->i;
797 
798   for (i=0; i<mbs; i++) {
799     n  = ii[1] - ii[0]; ii++;
800     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5];
801     for (j=0; j<n; j++) {
802       xb = x + 6*(*idx++);
803       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
804       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
805       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
806       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
807       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
808       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
809       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
810       v += 36;
811     }
812     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
813     z += 6; y += 6;
814   }
815   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
816   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
817   if (zz != yy) {
818     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
819   }
820   PetscLogFlops(72*a->nz);
821   PetscFunctionReturn(0);
822 }
823 
824 #undef __FUNCT__
825 #define __FUNCT__ "MatMultAdd_SeqBAIJ_7"
826 PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
827 {
828   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
829   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
830   PetscScalar     x1,x2,x3,x4,x5,x6,x7;
831   MatScalar       *v;
832   PetscErrorCode ierr;
833   int mbs=a->mbs,i,*idx,*ii,j,n;
834 
835   PetscFunctionBegin;
836   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
837   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
838   if (zz != yy) {
839     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
840   } else {
841     z = y;
842   }
843 
844   idx   = a->j;
845   v     = a->a;
846   ii    = a->i;
847 
848   for (i=0; i<mbs; i++) {
849     n  = ii[1] - ii[0]; ii++;
850     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
851     for (j=0; j<n; j++) {
852       xb = x + 7*(*idx++);
853       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
854       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
855       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
856       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
857       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
858       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
859       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
860       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
861       v += 49;
862     }
863     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
864     z += 7; y += 7;
865   }
866   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
867   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
868   if (zz != yy) {
869     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
870   }
871   PetscLogFlops(98*a->nz);
872   PetscFunctionReturn(0);
873 }
874 
875 #undef __FUNCT__
876 #define __FUNCT__ "MatMultAdd_SeqBAIJ_N"
877 PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
878 {
879   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
880   PetscScalar    *x,*z,*xb,*work,*workt,*y;
881   MatScalar      *v;
882   PetscErrorCode ierr;
883   int            mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
884   int            ncols,k;
885 
886   PetscFunctionBegin;
887   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
888   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
889   if (zz != yy) {
890     ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
891     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
892     ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
893   }
894 
895   idx   = a->j;
896   v     = a->a;
897   ii    = a->i;
898 
899 
900   if (!a->mult_work) {
901     k    = PetscMax(A->m,A->n);
902     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
903   }
904   work = a->mult_work;
905   for (i=0; i<mbs; i++) {
906     n     = ii[1] - ii[0]; ii++;
907     ncols = n*bs;
908     workt = work;
909     for (j=0; j<n; j++) {
910       xb = x + bs*(*idx++);
911       for (k=0; k<bs; k++) workt[k] = xb[k];
912       workt += bs;
913     }
914     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
915     /* LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */
916     v += n*bs2;
917     z += bs;
918   }
919   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
920   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
921   PetscLogFlops(2*a->nz*bs2);
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatMultTranspose_SeqBAIJ"
927 PetscErrorCode MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz)
928 {
929   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
930   PetscScalar     *xg,*zg,*zb,zero = 0.0;
931   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
932   MatScalar       *v;
933   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval;
934   PetscErrorCode ierr;
935   int             bs=a->bs,j,n,bs2=a->bs2,*ib;
936 
937   PetscFunctionBegin;
938   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
939   ierr = VecGetArray(xx,&xg);CHKERRQ(ierr); x = xg;
940   ierr = VecGetArray(zz,&zg);CHKERRQ(ierr); z = zg;
941 
942   idx   = a->j;
943   v     = a->a;
944   ii    = a->i;
945   xb    = x;
946   switch (bs) {
947   case 1:
948     for (i=0; i<mbs; i++) {
949       n  = ii[1] - ii[0]; ii++;
950       x1 = xb[0];
951       ib = idx + ai[i];
952       for (j=0; j<n; j++) {
953         rval    = ib[j];
954         z[rval] += *v * x1;
955         v++;
956       }
957       xb++;
958     }
959     break;
960   case 2:
961     for (i=0; i<mbs; i++) {
962       n  = ii[1] - ii[0]; ii++;
963       x1 = xb[0]; x2 = xb[1];
964       ib = idx + ai[i];
965       for (j=0; j<n; j++) {
966         rval      = ib[j]*2;
967         z[rval++] += v[0]*x1 + v[1]*x2;
968         z[rval]   += v[2]*x1 + v[3]*x2;
969         v  += 4;
970       }
971       xb += 2;
972     }
973     break;
974   case 3:
975     for (i=0; i<mbs; i++) {
976       n  = ii[1] - ii[0]; ii++;
977       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
978       ib = idx + ai[i];
979       for (j=0; j<n; j++) {
980         rval      = ib[j]*3;
981         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
982         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
983         z[rval]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
984         v  += 9;
985       }
986       xb += 3;
987     }
988     break;
989   case 4:
990     for (i=0; i<mbs; i++) {
991       n  = ii[1] - ii[0]; ii++;
992       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
993       ib = idx + ai[i];
994       for (j=0; j<n; j++) {
995         rval      = ib[j]*4;
996         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
997         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
998         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
999         z[rval]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1000         v  += 16;
1001       }
1002       xb += 4;
1003     }
1004     break;
1005   case 5:
1006     for (i=0; i<mbs; i++) {
1007       n  = ii[1] - ii[0]; ii++;
1008       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1009       x4 = xb[3]; x5 = xb[4];
1010       ib = idx + ai[i];
1011       for (j=0; j<n; j++) {
1012         rval      = ib[j]*5;
1013         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1014         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1015         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1016         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1017         z[rval]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1018         v  += 25;
1019       }
1020       xb += 5;
1021     }
1022     break;
1023   case 6:
1024     for (i=0; i<mbs; i++) {
1025       n  = ii[1] - ii[0]; ii++;
1026       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1027       x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
1028       ib = idx + ai[i];
1029       for (j=0; j<n; j++) {
1030         rval      = ib[j]*6;
1031         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 + v[4]*x5 + v[5]*x6;
1032         z[rval++] +=  v[6]*x1 +  v[7]*x2 +  v[8]*x3 +  v[9]*x4 + v[10]*x5 + v[11]*x6;
1033         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4 + v[16]*x5 + v[17]*x6;
1034         z[rval++] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4 + v[22]*x5 + v[23]*x6;
1035         z[rval++] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4 + v[28]*x5 + v[29]*x6;
1036         z[rval]   += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4 + v[34]*x5 + v[35]*x6;
1037         v  += 36;
1038       }
1039       xb += 6;
1040     }
1041     break;
1042   case 7:
1043     for (i=0; i<mbs; i++) {
1044       n  = ii[1] - ii[0]; ii++;
1045       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1046       x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1047       ib = idx + ai[i];
1048       for (j=0; j<n; j++) {
1049         rval      = ib[j]*7;
1050         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 + v[4]*x5 + v[5]*x6 + v[6]*x7;
1051         z[rval++] +=  v[7]*x1 +  v[8]*x2 +  v[9]*x3 + v[10]*x4 + v[11]*x5 + v[12]*x6 + v[13]*x7;
1052         z[rval++] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4 + v[18]*x5 + v[19]*x6 + v[20]*x7;
1053         z[rval++] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4 + v[25]*x5 + v[26]*x6 + v[27]*x7;
1054         z[rval++] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4 + v[32]*x5 + v[33]*x6 + v[34]*x7;
1055         z[rval++] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4 + v[39]*x5 + v[40]*x6 + v[41]*x7;
1056         z[rval]   += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4 + v[46]*x5 + v[47]*x6 + v[48]*x7;
1057         v  += 49;
1058       }
1059       xb += 7;
1060     }
1061     break;
1062   default: {       /* block sizes larger then 7 by 7 are handled by BLAS */
1063       int          ncols,k;
1064       PetscScalar  *work,*workt;
1065 
1066       if (!a->mult_work) {
1067         k = PetscMax(A->m,A->n);
1068         ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1069       }
1070       work = a->mult_work;
1071       for (i=0; i<mbs; i++) {
1072         n     = ii[1] - ii[0]; ii++;
1073         ncols = n*bs;
1074         ierr  = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1075         Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,work);
1076         /* LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); */
1077         v += n*bs2;
1078         x += bs;
1079         workt = work;
1080         for (j=0; j<n; j++) {
1081           zb = z + bs*(*idx++);
1082           for (k=0; k<bs; k++) zb[k] += workt[k] ;
1083           workt += bs;
1084         }
1085       }
1086     }
1087   }
1088   ierr = VecRestoreArray(xx,&xg);CHKERRQ(ierr);
1089   ierr = VecRestoreArray(zz,&zg);CHKERRQ(ierr);
1090   PetscLogFlops(2*a->nz*a->bs2 - A->n);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 #undef __FUNCT__
1095 #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ"
1096 PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1097 
1098 {
1099   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1100   PetscScalar     *xg,*zg,*zb,*x,*z,*xb,x1,x2,x3,x4,x5;
1101   MatScalar       *v;
1102   PetscErrorCode ierr;
1103   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,bs=a->bs,j,n,bs2=a->bs2,*ib;
1104 
1105   PetscFunctionBegin;
1106   if (yy != zz) { ierr = VecCopy(yy,zz);CHKERRQ(ierr); }
1107   ierr = VecGetArray(xx,&xg);CHKERRQ(ierr); x = xg;
1108   ierr = VecGetArray(zz,&zg);CHKERRQ(ierr); z = zg;
1109 
1110 
1111   idx   = a->j;
1112   v     = a->a;
1113   ii    = a->i;
1114   xb    = x;
1115 
1116   switch (bs) {
1117   case 1:
1118     for (i=0; i<mbs; i++) {
1119       n  = ii[1] - ii[0]; ii++;
1120       x1 = xb[0];
1121       ib = idx + ai[i];
1122       for (j=0; j<n; j++) {
1123         rval    = ib[j];
1124         z[rval] += *v * x1;
1125         v++;
1126       }
1127       xb++;
1128     }
1129     break;
1130   case 2:
1131     for (i=0; i<mbs; i++) {
1132       n  = ii[1] - ii[0]; ii++;
1133       x1 = xb[0]; x2 = xb[1];
1134       ib = idx + ai[i];
1135       for (j=0; j<n; j++) {
1136         rval      = ib[j]*2;
1137         z[rval++] += v[0]*x1 + v[1]*x2;
1138         z[rval++] += v[2]*x1 + v[3]*x2;
1139         v  += 4;
1140       }
1141       xb += 2;
1142     }
1143     break;
1144   case 3:
1145     for (i=0; i<mbs; i++) {
1146       n  = ii[1] - ii[0]; ii++;
1147       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1148       ib = idx + ai[i];
1149       for (j=0; j<n; j++) {
1150         rval      = ib[j]*3;
1151         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1152         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1153         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1154         v  += 9;
1155       }
1156       xb += 3;
1157     }
1158     break;
1159   case 4:
1160     for (i=0; i<mbs; i++) {
1161       n  = ii[1] - ii[0]; ii++;
1162       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1163       ib = idx + ai[i];
1164       for (j=0; j<n; j++) {
1165         rval      = ib[j]*4;
1166         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1167         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1168         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1169         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1170         v  += 16;
1171       }
1172       xb += 4;
1173     }
1174     break;
1175   case 5:
1176     for (i=0; i<mbs; i++) {
1177       n  = ii[1] - ii[0]; ii++;
1178       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1179       x4 = xb[3]; x5 = xb[4];
1180       ib = idx + ai[i];
1181       for (j=0; j<n; j++) {
1182         rval      = ib[j]*5;
1183         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1184         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1185         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1186         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1187         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1188         v  += 25;
1189       }
1190       xb += 5;
1191     }
1192     break;
1193   default: {      /* block sizes larger then 5 by 5 are handled by BLAS */
1194       int          ncols,k;
1195       PetscScalar  *work,*workt;
1196 
1197       if (!a->mult_work) {
1198         k = PetscMax(A->m,A->n);
1199         ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1200       }
1201       work = a->mult_work;
1202       for (i=0; i<mbs; i++) {
1203         n     = ii[1] - ii[0]; ii++;
1204         ncols = n*bs;
1205         ierr  = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1206         Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,work);
1207         /* LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); */
1208         v += n*bs2;
1209         x += bs;
1210         workt = work;
1211         for (j=0; j<n; j++) {
1212           zb = z + bs*(*idx++);
1213           for (k=0; k<bs; k++) zb[k] += workt[k] ;
1214           workt += bs;
1215         }
1216       }
1217     }
1218   }
1219   ierr = VecRestoreArray(xx,&xg);CHKERRQ(ierr);
1220   ierr = VecRestoreArray(zz,&zg);CHKERRQ(ierr);
1221   PetscLogFlops(2*a->nz*a->bs2);
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 #undef __FUNCT__
1226 #define __FUNCT__ "MatScale_SeqBAIJ"
1227 PetscErrorCode MatScale_SeqBAIJ(const PetscScalar *alpha,Mat inA)
1228 {
1229   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1230   int         totalnz = a->bs2*a->nz;
1231 #if defined(PETSC_USE_MAT_SINGLE)
1232   int         i;
1233 #else
1234   int         one = 1;
1235 #endif
1236 
1237   PetscFunctionBegin;
1238 #if defined(PETSC_USE_MAT_SINGLE)
1239   for (i=0; i<totalnz; i++) a->a[i] *= *alpha;
1240 #else
1241   BLscal_(&totalnz,(PetscScalar*)alpha,a->a,&one);
1242 #endif
1243   PetscLogFlops(totalnz);
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatNorm_SeqBAIJ"
1249 PetscErrorCode MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm)
1250 {
1251   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1252   MatScalar   *v = a->a;
1253   PetscReal   sum = 0.0;
1254   int         i,j,k,bs = a->bs,nz=a->nz,bs2=a->bs2,k1;
1255 
1256   PetscFunctionBegin;
1257   if (type == NORM_FROBENIUS) {
1258     for (i=0; i< bs2*nz; i++) {
1259 #if defined(PETSC_USE_COMPLEX)
1260       sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1261 #else
1262       sum += (*v)*(*v); v++;
1263 #endif
1264     }
1265     *norm = sqrt(sum);
1266   }  else if (type == NORM_INFINITY) { /* maximum row sum */
1267     *norm = 0.0;
1268     for (k=0; k<bs; k++) {
1269       for (j=0; j<a->mbs; j++) {
1270         v = a->a + bs2*a->i[j] + k;
1271         sum = 0.0;
1272         for (i=0; i<a->i[j+1]-a->i[j]; i++) {
1273           for (k1=0; k1<bs; k1++){
1274             sum += PetscAbsScalar(*v);
1275             v   += bs;
1276           }
1277         }
1278         if (sum > *norm) *norm = sum;
1279       }
1280     }
1281   } else {
1282     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1283   }
1284   PetscFunctionReturn(0);
1285 }
1286 
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatEqual_SeqBAIJ"
1290 PetscErrorCode MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg)
1291 {
1292   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data;
1293   PetscErrorCode ierr;
1294 
1295   PetscFunctionBegin;
1296   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1297   if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->nz != b->nz)) {
1298     *flg = PETSC_FALSE;
1299     PetscFunctionReturn(0);
1300   }
1301 
1302   /* if the a->i are the same */
1303   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr);
1304   if (*flg == PETSC_FALSE) {
1305     PetscFunctionReturn(0);
1306   }
1307 
1308   /* if a->j are the same */
1309   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int),flg);CHKERRQ(ierr);
1310   if (*flg == PETSC_FALSE) {
1311     PetscFunctionReturn(0);
1312   }
1313   /* if a->a are the same */
1314   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1315   PetscFunctionReturn(0);
1316 
1317 }
1318 
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatGetDiagonal_SeqBAIJ"
1321 PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1322 {
1323   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1324   PetscErrorCode ierr;
1325   int  i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1326   PetscScalar  *x,zero = 0.0;
1327   MatScalar    *aa,*aa_j;
1328 
1329   PetscFunctionBegin;
1330   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1331   bs   = a->bs;
1332   aa   = a->a;
1333   ai   = a->i;
1334   aj   = a->j;
1335   ambs = a->mbs;
1336   bs2  = a->bs2;
1337 
1338   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1339   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1340   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1341   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1342   for (i=0; i<ambs; i++) {
1343     for (j=ai[i]; j<ai[i+1]; j++) {
1344       if (aj[j] == i) {
1345         row  = i*bs;
1346         aa_j = aa+j*bs2;
1347         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1348         break;
1349       }
1350     }
1351   }
1352   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatDiagonalScale_SeqBAIJ"
1358 PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1359 {
1360   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1361   PetscScalar  *l,*r,x,*li,*ri;
1362   MatScalar    *aa,*v;
1363   PetscErrorCode ierr;
1364   int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1365 
1366   PetscFunctionBegin;
1367   ai  = a->i;
1368   aj  = a->j;
1369   aa  = a->a;
1370   m   = A->m;
1371   n   = A->n;
1372   bs  = a->bs;
1373   mbs = a->mbs;
1374   bs2 = a->bs2;
1375   if (ll) {
1376     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
1377     ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1378     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1379     for (i=0; i<mbs; i++) { /* for each block row */
1380       M  = ai[i+1] - ai[i];
1381       li = l + i*bs;
1382       v  = aa + bs2*ai[i];
1383       for (j=0; j<M; j++) { /* for each block */
1384         for (k=0; k<bs2; k++) {
1385           (*v++) *= li[k%bs];
1386         }
1387       }
1388     }
1389     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1390     PetscLogFlops(a->nz);
1391   }
1392 
1393   if (rr) {
1394     ierr = VecGetArray(rr,&r);CHKERRQ(ierr);
1395     ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr);
1396     if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
1397     for (i=0; i<mbs; i++) { /* for each block row */
1398       M  = ai[i+1] - ai[i];
1399       v  = aa + bs2*ai[i];
1400       for (j=0; j<M; j++) { /* for each block */
1401         ri = r + bs*aj[ai[i]+j];
1402         for (k=0; k<bs; k++) {
1403           x = ri[k];
1404           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
1405         }
1406       }
1407     }
1408     ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr);
1409     PetscLogFlops(a->nz);
1410   }
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatGetInfo_SeqBAIJ"
1417 PetscErrorCode MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1418 {
1419   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1420 
1421   PetscFunctionBegin;
1422   info->rows_global    = (double)A->m;
1423   info->columns_global = (double)A->n;
1424   info->rows_local     = (double)A->m;
1425   info->columns_local  = (double)A->n;
1426   info->block_size     = a->bs2;
1427   info->nz_allocated   = a->maxnz;
1428   info->nz_used        = a->bs2*a->nz;
1429   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1430   info->assemblies   = A->num_ass;
1431   info->mallocs      = a->reallocs;
1432   info->memory       = A->mem;
1433   if (A->factor) {
1434     info->fill_ratio_given  = A->info.fill_ratio_given;
1435     info->fill_ratio_needed = A->info.fill_ratio_needed;
1436     info->factor_mallocs    = A->info.factor_mallocs;
1437   } else {
1438     info->fill_ratio_given  = 0;
1439     info->fill_ratio_needed = 0;
1440     info->factor_mallocs    = 0;
1441   }
1442   PetscFunctionReturn(0);
1443 }
1444 
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatZeroEntries_SeqBAIJ"
1448 PetscErrorCode MatZeroEntries_SeqBAIJ(Mat A)
1449 {
1450   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1451   PetscErrorCode ierr;
1452 
1453   PetscFunctionBegin;
1454   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
1455   PetscFunctionReturn(0);
1456 }
1457