xref: /petsc/src/mat/impls/baij/seq/baij2.c (revision 2f2cbf6d06d0509fc30bbb0fccbdb1461a4f72a9)
1 /*$Id: baij2.c,v 1.75 2001/09/07 20:09:49 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/seq/baij.h"
4 #include "src/inline/spops.h"
5 #include "src/inline/ilu.h"
6 #include "petscbt.h"
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ"
10 int MatIncreaseOverlap_SeqBAIJ(Mat A,int is_max,IS is[],int ov)
11 {
12   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
13   int         row,i,j,k,l,m,n,*idx,ierr,*nidx,isz,val,ival;
14   int         start,end,*ai,*aj,bs,*nidx2;
15   PetscBT     table;
16 
17   PetscFunctionBegin;
18   m     = a->mbs;
19   ai    = a->i;
20   aj    = a->j;
21   bs    = a->bs;
22 
23   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
24 
25   ierr = PetscBTCreate(m,table);CHKERRQ(ierr);
26   ierr = PetscMalloc((m+1)*sizeof(int),&nidx);CHKERRQ(ierr);
27   ierr = PetscMalloc((A->m+1)*sizeof(int),&nidx2);CHKERRQ(ierr);
28 
29   for (i=0; i<is_max; i++) {
30     /* Initialise the two local arrays */
31     isz  = 0;
32     ierr = PetscBTMemzero(m,table);CHKERRQ(ierr);
33 
34     /* Extract the indices, assume there can be duplicate entries */
35     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
36     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
37 
38     /* Enter these into the temp arrays i.e mark table[row], enter row into new index */
39     for (j=0; j<n ; ++j){
40       ival = idx[j]/bs; /* convert the indices into block indices */
41       if (ival>m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
42       if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;}
43     }
44     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
45     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
46 
47     k = 0;
48     for (j=0; j<ov; j++){ /* for each overlap*/
49       n = isz;
50       for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */
51         row   = nidx[k];
52         start = ai[row];
53         end   = ai[row+1];
54         for (l = start; l<end ; l++){
55           val = aj[l];
56           if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;}
57         }
58       }
59     }
60     /* expand the Index Set */
61     for (j=0; j<isz; j++) {
62       for (k=0; k<bs; k++)
63         nidx2[j*bs+k] = nidx[j]*bs+k;
64     }
65     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
66   }
67   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
68   ierr = PetscFree(nidx);CHKERRQ(ierr);
69   ierr = PetscFree(nidx2);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 
73 #undef __FUNCT__
74 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private"
75 int MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
76 {
77   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data,*c;
78   int          *smap,i,k,kstart,kend,ierr,oldcols = a->nbs,*lens;
79   int          row,mat_i,*mat_j,tcol,*mat_ilen;
80   int          *irow,*icol,nrows,ncols,*ssmap,bs=a->bs,bs2=a->bs2;
81   int          *aj = a->j,*ai = a->i;
82   MatScalar    *mat_a;
83   Mat          C;
84   PetscTruth   flag;
85 
86   PetscFunctionBegin;
87   ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr);
88   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
89 
90   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
91   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
92   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
93   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
94 
95   ierr = PetscMalloc((1+oldcols)*sizeof(int),&smap);CHKERRQ(ierr);
96   ssmap = smap;
97   ierr = PetscMalloc((1+nrows)*sizeof(int),&lens);CHKERRQ(ierr);
98   ierr  = PetscMemzero(smap,oldcols*sizeof(int));CHKERRQ(ierr);
99   for (i=0; i<ncols; i++) smap[icol[i]] = i+1;
100   /* determine lens of each row */
101   for (i=0; i<nrows; i++) {
102     kstart  = ai[irow[i]];
103     kend    = kstart + a->ilen[irow[i]];
104     lens[i] = 0;
105       for (k=kstart; k<kend; k++) {
106         if (ssmap[aj[k]]) {
107           lens[i]++;
108         }
109       }
110     }
111   /* Create and fill new matrix */
112   if (scall == MAT_REUSE_MATRIX) {
113     c = (Mat_SeqBAIJ *)((*B)->data);
114 
115     if (c->mbs!=nrows || c->nbs!=ncols || c->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
116     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(int),&flag);CHKERRQ(ierr);
117     if (flag == PETSC_FALSE) {
118       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
119     }
120     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(int));CHKERRQ(ierr);
121     C = *B;
122   } else {
123     ierr = MatCreateSeqBAIJ(A->comm,bs,nrows*bs,ncols*bs,0,lens,&C);CHKERRQ(ierr);
124   }
125   c = (Mat_SeqBAIJ *)(C->data);
126   for (i=0; i<nrows; i++) {
127     row    = irow[i];
128     kstart = ai[row];
129     kend   = kstart + a->ilen[row];
130     mat_i  = c->i[i];
131     mat_j  = c->j + mat_i;
132     mat_a  = c->a + mat_i*bs2;
133     mat_ilen = c->ilen + i;
134     for (k=kstart; k<kend; k++) {
135       if ((tcol=ssmap[a->j[k]])) {
136         *mat_j++ = tcol - 1;
137         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
138         mat_a   += bs2;
139         (*mat_ilen)++;
140       }
141     }
142   }
143 
144   /* Free work space */
145   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
146   ierr = PetscFree(smap);CHKERRQ(ierr);
147   ierr = PetscFree(lens);CHKERRQ(ierr);
148   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
149   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
150 
151   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
152   *B = C;
153   PetscFunctionReturn(0);
154 }
155 
156 #undef __FUNCT__
157 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ"
158 int MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
159 {
160   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
161   IS          is1,is2;
162   int         *vary,*iary,*irow,*icol,nrows,ncols,i,ierr,bs=a->bs,count;
163 
164   PetscFunctionBegin;
165   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
166   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
167   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
168   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
169 
170   /* Verify if the indices corespond to each element in a block
171    and form the IS with compressed IS */
172   ierr = PetscMalloc(2*(a->mbs+1)*sizeof(int),&vary);CHKERRQ(ierr);
173   iary = vary + a->mbs;
174   ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr);
175   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
176   count = 0;
177   for (i=0; i<a->mbs; i++) {
178     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Index set does not match blocks");
179     if (vary[i]==bs) iary[count++] = i;
180   }
181   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
182 
183   ierr = PetscMemzero(vary,(a->mbs)*sizeof(int));CHKERRQ(ierr);
184   for (i=0; i<ncols; i++) vary[icol[i]/bs]++;
185   count = 0;
186   for (i=0; i<a->mbs; i++) {
187     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(1,"Internal error in PETSc");
188     if (vary[i]==bs) iary[count++] = i;
189   }
190   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);CHKERRQ(ierr);
191   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
192   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
193   ierr = PetscFree(vary);CHKERRQ(ierr);
194 
195   ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,cs,scall,B);CHKERRQ(ierr);
196   ISDestroy(is1);
197   ISDestroy(is2);
198   PetscFunctionReturn(0);
199 }
200 
201 #undef __FUNCT__
202 #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ"
203 int MatGetSubMatrices_SeqBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
204 {
205   int ierr,i;
206 
207   PetscFunctionBegin;
208   if (scall == MAT_INITIAL_MATRIX) {
209     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
210   }
211 
212   for (i=0; i<n; i++) {
213     ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 
219 /* -------------------------------------------------------*/
220 /* Should check that shapes of vectors and matrices match */
221 /* -------------------------------------------------------*/
222 #include "petscblaslapack.h"
223 
224 #undef __FUNCT__
225 #define __FUNCT__ "MatMult_SeqBAIJ_1"
226 int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
227 {
228   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
229   PetscScalar     *x,*z,sum;
230   MatScalar       *v;
231   int             mbs=a->mbs,i,*idx,*ii,n,ierr;
232 
233   PetscFunctionBegin;
234   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
235   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
236 
237   idx   = a->j;
238   v     = a->a;
239   ii    = a->i;
240 
241   for (i=0; i<mbs; i++) {
242     n    = ii[1] - ii[0]; ii++;
243     sum  = 0.0;
244     while (n--) sum += *v++ * x[*idx++];
245     z[i] = sum;
246   }
247   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
248   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
249   PetscLogFlops(2*a->nz - A->m);
250   PetscFunctionReturn(0);
251 }
252 
253 #undef __FUNCT__
254 #define __FUNCT__ "MatMult_SeqBAIJ_2"
255 int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
256 {
257   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
258   PetscScalar     *x,*z,*xb,sum1,sum2;
259   PetscScalar     x1,x2;
260   MatScalar       *v;
261   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
262 
263   PetscFunctionBegin;
264   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
265   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
266 
267   idx   = a->j;
268   v     = a->a;
269   ii    = a->i;
270 
271   for (i=0; i<mbs; i++) {
272     n  = ii[1] - ii[0]; ii++;
273     sum1 = 0.0; sum2 = 0.0;
274     for (j=0; j<n; j++) {
275       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
276       sum1 += v[0]*x1 + v[2]*x2;
277       sum2 += v[1]*x1 + v[3]*x2;
278       v += 4;
279     }
280     z[0] = sum1; z[1] = sum2;
281     z += 2;
282   }
283   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
284   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
285   PetscLogFlops(8*a->nz - A->m);
286   PetscFunctionReturn(0);
287 }
288 
289 #undef __FUNCT__
290 #define __FUNCT__ "MatMult_SeqBAIJ_3"
291 int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
292 {
293   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
294   PetscScalar  *x,*z,*xb,sum1,sum2,sum3,x1,x2,x3;
295   MatScalar    *v;
296   int          ierr,mbs=a->mbs,i,*idx,*ii,j,n;
297 
298 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
299 #pragma disjoint(*v,*z,*xb)
300 #endif
301 
302   PetscFunctionBegin;
303   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
304   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
305 
306   idx   = a->j;
307   v     = a->a;
308   ii    = a->i;
309 
310   for (i=0; i<mbs; i++) {
311     n  = ii[1] - ii[0]; ii++;
312     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
313     for (j=0; j<n; j++) {
314       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
315       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
316       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
317       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
318       v += 9;
319     }
320     z[0] = sum1; z[1] = sum2; z[2] = sum3;
321     z += 3;
322   }
323   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
324   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
325   PetscLogFlops(18*a->nz - A->m);
326   PetscFunctionReturn(0);
327 }
328 
329 #undef __FUNCT__
330 #define __FUNCT__ "MatMult_SeqBAIJ_4"
331 int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
332 {
333   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
334   PetscScalar     *x,*z,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4;
335   MatScalar       *v;
336   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
337 
338   PetscFunctionBegin;
339   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
340   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
341 
342   idx   = a->j;
343   v     = a->a;
344   ii    = a->i;
345 
346   for (i=0; i<mbs; i++) {
347     n  = ii[1] - ii[0]; ii++;
348     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
349     for (j=0; j<n; j++) {
350       xb = x + 4*(*idx++);
351       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
352       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
353       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
354       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
355       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
356       v += 16;
357     }
358     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
359     z += 4;
360   }
361   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
362   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
363   PetscLogFlops(32*a->nz - A->m);
364   PetscFunctionReturn(0);
365 }
366 
367 #undef __FUNCT__
368 #define __FUNCT__ "MatMult_SeqBAIJ_5"
369 int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
370 {
371   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
372   PetscScalar     sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*xb,*z,*x;
373   MatScalar       *v;
374   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
375 
376   PetscFunctionBegin;
377   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
378   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
379 
380   idx   = a->j;
381   v     = a->a;
382   ii    = a->i;
383 
384   for (i=0; i<mbs; i++) {
385     n  = ii[1] - ii[0]; ii++;
386     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
387     for (j=0; j<n; j++) {
388       xb = x + 5*(*idx++);
389       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
390       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
391       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
392       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
393       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
394       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
395       v += 25;
396     }
397     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
398     z += 5;
399   }
400   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
401   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
402   PetscLogFlops(50*a->nz - A->m);
403   PetscFunctionReturn(0);
404 }
405 
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatMult_SeqBAIJ_6"
409 int MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz)
410 {
411   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
412   PetscScalar     *x,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
413   PetscScalar     x1,x2,x3,x4,x5,x6;
414   MatScalar       *v;
415   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
416 
417   PetscFunctionBegin;
418   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
419   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
420 
421   idx   = a->j;
422   v     = a->a;
423   ii    = a->i;
424 
425   for (i=0; i<mbs; i++) {
426     n  = ii[1] - ii[0]; ii++;
427     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0;
428     for (j=0; j<n; j++) {
429       xb = x + 6*(*idx++);
430       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
431       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
432       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
433       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
434       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
435       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
436       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
437       v += 36;
438     }
439     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
440     z += 6;
441   }
442 
443   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
444   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
445   PetscLogFlops(72*a->nz - A->m);
446   PetscFunctionReturn(0);
447 }
448 #undef __FUNCT__
449 #define __FUNCT__ "MatMult_SeqBAIJ_7"
450 int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
451 {
452   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
453   PetscScalar     *x,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
454   PetscScalar     x1,x2,x3,x4,x5,x6,x7;
455   MatScalar       *v;
456   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
457 
458   PetscFunctionBegin;
459   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
460   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
461 
462   idx   = a->j;
463   v     = a->a;
464   ii    = a->i;
465 
466   for (i=0; i<mbs; i++) {
467     n  = ii[1] - ii[0]; ii++;
468     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
469     for (j=0; j<n; j++) {
470       xb = x + 7*(*idx++);
471       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
472       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
473       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
474       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
475       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
476       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
477       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
478       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
479       v += 49;
480     }
481     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
482     z += 7;
483   }
484 
485   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
486   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
487   PetscLogFlops(98*a->nz - A->m);
488   PetscFunctionReturn(0);
489 }
490 
491 /*
492     This will not work with MatScalar == float because it calls the BLAS
493 */
494 #undef __FUNCT__
495 #define __FUNCT__ "MatMult_SeqBAIJ_N"
496 int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
497 {
498   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
499   PetscScalar     *x,*z,*xb,*work,*workt;
500   MatScalar       *v;
501   int             ierr,mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2;
502   int             ncols,k;
503 
504   PetscFunctionBegin;
505   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
506   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
507 
508   idx   = a->j;
509   v     = a->a;
510   ii    = a->i;
511 
512 
513   if (!a->mult_work) {
514     k    = PetscMax(A->m,A->n);
515     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
516   }
517   work = a->mult_work;
518   for (i=0; i<mbs; i++) {
519     n     = ii[1] - ii[0]; ii++;
520     ncols = n*bs;
521     workt = work;
522     for (j=0; j<n; j++) {
523       xb = x + bs*(*idx++);
524       for (k=0; k<bs; k++) workt[k] = xb[k];
525       workt += bs;
526     }
527     Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z);
528     /* LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */
529     v += n*bs2;
530     z += bs;
531   }
532   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
533   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
534   PetscLogFlops(2*a->nz*bs2 - A->m);
535   PetscFunctionReturn(0);
536 }
537 
538 #undef __FUNCT__
539 #define __FUNCT__ "MatMultAdd_SeqBAIJ_1"
540 int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
541 {
542   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
543   PetscScalar     *x,*y,*z,sum;
544   MatScalar       *v;
545   int             ierr,mbs=a->mbs,i,*idx,*ii,n;
546 
547   PetscFunctionBegin;
548   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
549   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
550   if (zz != yy) {
551     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
552   } else {
553     z = y;
554   }
555 
556   idx   = a->j;
557   v     = a->a;
558   ii    = a->i;
559 
560   for (i=0; i<mbs; i++) {
561     n    = ii[1] - ii[0]; ii++;
562     sum  = y[i];
563     while (n--) sum += *v++ * x[*idx++];
564     z[i] = sum;
565   }
566   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
567   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
568   if (zz != yy) {
569     ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
570   }
571   PetscLogFlops(2*a->nz);
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatMultAdd_SeqBAIJ_2"
577 int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
578 {
579   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
580   PetscScalar     *x,*y,*z,*xb,sum1,sum2;
581   PetscScalar     x1,x2;
582   MatScalar       *v;
583   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
584 
585   PetscFunctionBegin;
586   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
587   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
588   if (zz != yy) {
589     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
590   } else {
591     z = y;
592   }
593 
594   idx   = a->j;
595   v     = a->a;
596   ii    = a->i;
597 
598   for (i=0; i<mbs; i++) {
599     n  = ii[1] - ii[0]; ii++;
600     sum1 = y[0]; sum2 = y[1];
601     for (j=0; j<n; j++) {
602       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
603       sum1 += v[0]*x1 + v[2]*x2;
604       sum2 += v[1]*x1 + v[3]*x2;
605       v += 4;
606     }
607     z[0] = sum1; z[1] = sum2;
608     z += 2; y += 2;
609   }
610   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
611   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
612   if (zz != yy) {
613     ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
614   }
615   PetscLogFlops(4*a->nz);
616   PetscFunctionReturn(0);
617 }
618 
619 #undef __FUNCT__
620 #define __FUNCT__ "MatMultAdd_SeqBAIJ_3"
621 int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
622 {
623   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
624   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,x1,x2,x3;
625   MatScalar       *v;
626   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
627 
628   PetscFunctionBegin;
629   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
630   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
631   if (zz != yy) {
632     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
633   } else {
634     z = y;
635   }
636 
637   idx   = a->j;
638   v     = a->a;
639   ii    = a->i;
640 
641   for (i=0; i<mbs; i++) {
642     n  = ii[1] - ii[0]; ii++;
643     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
644     for (j=0; j<n; j++) {
645       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
646       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
647       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
648       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
649       v += 9;
650     }
651     z[0] = sum1; z[1] = sum2; z[2] = sum3;
652     z += 3; y += 3;
653   }
654   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
655   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
656   if (zz != yy) {
657     ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
658   }
659   PetscLogFlops(18*a->nz);
660   PetscFunctionReturn(0);
661 }
662 
663 #undef __FUNCT__
664 #define __FUNCT__ "MatMultAdd_SeqBAIJ_4"
665 int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
666 {
667   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
668   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4;
669   MatScalar       *v;
670   int             ierr,mbs=a->mbs,i,*idx,*ii;
671   int             j,n;
672 
673   PetscFunctionBegin;
674   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
675   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
676   if (zz != yy) {
677     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
678   } else {
679     z = y;
680   }
681 
682   idx   = a->j;
683   v     = a->a;
684   ii    = a->i;
685 
686   for (i=0; i<mbs; i++) {
687     n  = ii[1] - ii[0]; ii++;
688     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
689     for (j=0; j<n; j++) {
690       xb = x + 4*(*idx++);
691       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
692       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
693       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
694       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
695       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
696       v += 16;
697     }
698     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
699     z += 4; y += 4;
700   }
701   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
702   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
703   if (zz != yy) {
704     ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
705   }
706   PetscLogFlops(32*a->nz);
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNCT__
711 #define __FUNCT__ "MatMultAdd_SeqBAIJ_5"
712 int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
713 {
714   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
715   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
716   MatScalar       *v;
717   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
718 
719   PetscFunctionBegin;
720   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
721   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
722   if (zz != yy) {
723     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
724   } else {
725     z = y;
726   }
727 
728   idx   = a->j;
729   v     = a->a;
730   ii    = a->i;
731 
732   for (i=0; i<mbs; i++) {
733     n  = ii[1] - ii[0]; ii++;
734     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
735     for (j=0; j<n; j++) {
736       xb = x + 5*(*idx++);
737       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
738       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
739       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
740       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
741       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
742       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
743       v += 25;
744     }
745     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
746     z += 5; y += 5;
747   }
748   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
749   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
750   if (zz != yy) {
751     ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
752   }
753   PetscLogFlops(50*a->nz);
754   PetscFunctionReturn(0);
755 }
756 #undef __FUNCT__
757 #define __FUNCT__ "MatMultAdd_SeqBAIJ_6"
758 int MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
759 {
760   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
761   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
762   PetscScalar     x1,x2,x3,x4,x5,x6;
763   MatScalar       *v;
764   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
765 
766   PetscFunctionBegin;
767   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
768   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
769   if (zz != yy) {
770     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
771   } else {
772     z = y;
773   }
774 
775   idx   = a->j;
776   v     = a->a;
777   ii    = a->i;
778 
779   for (i=0; i<mbs; i++) {
780     n  = ii[1] - ii[0]; ii++;
781     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5];
782     for (j=0; j<n; j++) {
783       xb = x + 6*(*idx++);
784       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
785       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
786       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
787       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
788       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
789       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
790       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
791       v += 36;
792     }
793     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
794     z += 6; y += 6;
795   }
796   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
797   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
798   if (zz != yy) {
799     ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
800   }
801   PetscLogFlops(72*a->nz);
802   PetscFunctionReturn(0);
803 }
804 
805 #undef __FUNCT__
806 #define __FUNCT__ "MatMultAdd_SeqBAIJ_7"
807 int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
808 {
809   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
810   PetscScalar     *x,*y,*z,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
811   PetscScalar     x1,x2,x3,x4,x5,x6,x7;
812   MatScalar       *v;
813   int             ierr,mbs=a->mbs,i,*idx,*ii,j,n;
814 
815   PetscFunctionBegin;
816   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
817   ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
818   if (zz != yy) {
819     ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
820   } else {
821     z = y;
822   }
823 
824   idx   = a->j;
825   v     = a->a;
826   ii    = a->i;
827 
828   for (i=0; i<mbs; i++) {
829     n  = ii[1] - ii[0]; ii++;
830     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
831     for (j=0; j<n; j++) {
832       xb = x + 7*(*idx++);
833       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
834       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
835       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
836       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
837       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
838       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
839       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
840       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
841       v += 49;
842     }
843     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
844     z += 7; y += 7;
845   }
846   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
847   ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
848   if (zz != yy) {
849     ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
850   }
851   PetscLogFlops(98*a->nz);
852   PetscFunctionReturn(0);
853 }
854 
855 #undef __FUNCT__
856 #define __FUNCT__ "MatMultAdd_SeqBAIJ_N"
857 int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
858 {
859   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
860   PetscScalar    *x,*z,*xb,*work,*workt,*y;
861   MatScalar      *v;
862   int            mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr;
863   int            ncols,k;
864 
865   PetscFunctionBegin;
866   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
867   ierr = VecGetArrayFast(zz,&z);CHKERRQ(ierr);
868   if (zz != yy) {
869     ierr = VecGetArrayFast(yy,&y);CHKERRQ(ierr);
870     ierr = PetscMemcpy(z,y,yy->n*sizeof(PetscScalar));CHKERRQ(ierr);
871     ierr = VecRestoreArrayFast(yy,&y);CHKERRQ(ierr);
872   }
873 
874   idx   = a->j;
875   v     = a->a;
876   ii    = a->i;
877 
878 
879   if (!a->mult_work) {
880     k    = PetscMax(A->m,A->n);
881     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
882   }
883   work = a->mult_work;
884   for (i=0; i<mbs; i++) {
885     n     = ii[1] - ii[0]; ii++;
886     ncols = n*bs;
887     workt = work;
888     for (j=0; j<n; j++) {
889       xb = x + bs*(*idx++);
890       for (k=0; k<bs; k++) workt[k] = xb[k];
891       workt += bs;
892     }
893     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
894     /* LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */
895     v += n*bs2;
896     z += bs;
897   }
898   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
899   ierr = VecRestoreArrayFast(zz,&z);CHKERRQ(ierr);
900   PetscLogFlops(2*a->nz*bs2);
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatMultTranspose_SeqBAIJ"
906 int MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz)
907 {
908   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
909   PetscScalar     *xg,*zg,*zb,zero = 0.0;
910   PetscScalar     *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
911   MatScalar       *v;
912   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval;
913   int             bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
914 
915   PetscFunctionBegin;
916   ierr = VecSet(&zero,zz);CHKERRQ(ierr);
917   ierr = VecGetArrayFast(xx,&xg);CHKERRQ(ierr); x = xg;
918   ierr = VecGetArrayFast(zz,&zg);CHKERRQ(ierr); z = zg;
919 
920   idx   = a->j;
921   v     = a->a;
922   ii    = a->i;
923   xb    = x;
924   switch (bs) {
925   case 1:
926     for (i=0; i<mbs; i++) {
927       n  = ii[1] - ii[0]; ii++;
928       x1 = xb[0];
929       ib = idx + ai[i];
930       for (j=0; j<n; j++) {
931         rval    = ib[j];
932         z[rval] += *v * x1;
933         v++;
934       }
935       xb++;
936     }
937     break;
938   case 2:
939     for (i=0; i<mbs; i++) {
940       n  = ii[1] - ii[0]; ii++;
941       x1 = xb[0]; x2 = xb[1];
942       ib = idx + ai[i];
943       for (j=0; j<n; j++) {
944         rval      = ib[j]*2;
945         z[rval++] += v[0]*x1 + v[1]*x2;
946         z[rval]   += v[2]*x1 + v[3]*x2;
947         v  += 4;
948       }
949       xb += 2;
950     }
951     break;
952   case 3:
953     for (i=0; i<mbs; i++) {
954       n  = ii[1] - ii[0]; ii++;
955       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
956       ib = idx + ai[i];
957       for (j=0; j<n; j++) {
958         rval      = ib[j]*3;
959         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
960         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
961         z[rval]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
962         v  += 9;
963       }
964       xb += 3;
965     }
966     break;
967   case 4:
968     for (i=0; i<mbs; i++) {
969       n  = ii[1] - ii[0]; ii++;
970       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
971       ib = idx + ai[i];
972       for (j=0; j<n; j++) {
973         rval      = ib[j]*4;
974         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
975         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
976         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
977         z[rval]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
978         v  += 16;
979       }
980       xb += 4;
981     }
982     break;
983   case 5:
984     for (i=0; i<mbs; i++) {
985       n  = ii[1] - ii[0]; ii++;
986       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
987       x4 = xb[3]; x5 = xb[4];
988       ib = idx + ai[i];
989       for (j=0; j<n; j++) {
990         rval      = ib[j]*5;
991         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
992         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
993         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
994         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
995         z[rval]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
996         v  += 25;
997       }
998       xb += 5;
999     }
1000     break;
1001   case 6:
1002     for (i=0; i<mbs; i++) {
1003       n  = ii[1] - ii[0]; ii++;
1004       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1005       x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
1006       ib = idx + ai[i];
1007       for (j=0; j<n; j++) {
1008         rval      = ib[j]*6;
1009         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 + v[4]*x5 + v[5]*x6;
1010         z[rval++] +=  v[6]*x1 +  v[7]*x2 +  v[8]*x3 +  v[9]*x4 + v[10]*x5 + v[11]*x6;
1011         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4 + v[16]*x5 + v[17]*x6;
1012         z[rval++] += v[18]*x1 + v[19]*x2 + v[20]*x3 + v[21]*x4 + v[22]*x5 + v[23]*x6;
1013         z[rval++] += v[24]*x1 + v[25]*x2 + v[26]*x3 + v[27]*x4 + v[28]*x5 + v[29]*x6;
1014         z[rval]   += v[30]*x1 + v[31]*x2 + v[32]*x3 + v[33]*x4 + v[34]*x5 + v[35]*x6;
1015         v  += 36;
1016       }
1017       xb += 6;
1018     }
1019     break;
1020   case 7:
1021     for (i=0; i<mbs; i++) {
1022       n  = ii[1] - ii[0]; ii++;
1023       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1024       x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1025       ib = idx + ai[i];
1026       for (j=0; j<n; j++) {
1027         rval      = ib[j]*7;
1028         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 + v[4]*x5 + v[5]*x6 + v[6]*x7;
1029         z[rval++] +=  v[7]*x1 +  v[8]*x2 +  v[9]*x3 + v[10]*x4 + v[11]*x5 + v[12]*x6 + v[13]*x7;
1030         z[rval++] += v[14]*x1 + v[15]*x2 + v[16]*x3 + v[17]*x4 + v[18]*x5 + v[19]*x6 + v[20]*x7;
1031         z[rval++] += v[21]*x1 + v[22]*x2 + v[23]*x3 + v[24]*x4 + v[25]*x5 + v[26]*x6 + v[27]*x7;
1032         z[rval++] += v[28]*x1 + v[29]*x2 + v[30]*x3 + v[31]*x4 + v[32]*x5 + v[33]*x6 + v[34]*x7;
1033         z[rval++] += v[35]*x1 + v[36]*x2 + v[37]*x3 + v[38]*x4 + v[39]*x5 + v[40]*x6 + v[41]*x7;
1034         z[rval]   += v[42]*x1 + v[43]*x2 + v[44]*x3 + v[45]*x4 + v[46]*x5 + v[47]*x6 + v[48]*x7;
1035         v  += 49;
1036       }
1037       xb += 7;
1038     }
1039     break;
1040   default: {       /* block sizes larger then 7 by 7 are handled by BLAS */
1041       int          ncols,k;
1042       PetscScalar  *work,*workt;
1043 
1044       if (!a->mult_work) {
1045         k = PetscMax(A->m,A->n);
1046         ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1047       }
1048       work = a->mult_work;
1049       for (i=0; i<mbs; i++) {
1050         n     = ii[1] - ii[0]; ii++;
1051         ncols = n*bs;
1052         ierr  = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1053         Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,work);
1054         /* LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); */
1055         v += n*bs2;
1056         x += bs;
1057         workt = work;
1058         for (j=0; j<n; j++) {
1059           zb = z + bs*(*idx++);
1060           for (k=0; k<bs; k++) zb[k] += workt[k] ;
1061           workt += bs;
1062         }
1063       }
1064     }
1065   }
1066   ierr = VecRestoreArrayFast(xx,&xg);CHKERRQ(ierr);
1067   ierr = VecRestoreArrayFast(zz,&zg);CHKERRQ(ierr);
1068   PetscLogFlops(2*a->nz*a->bs2 - A->n);
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ"
1074 int MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1075 
1076 {
1077   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1078   PetscScalar     *xg,*zg,*zb,*x,*z,*xb,x1,x2,x3,x4,x5;
1079   MatScalar       *v;
1080   int             mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,bs=a->bs,j,n,bs2=a->bs2,*ib,ierr;
1081 
1082   PetscFunctionBegin;
1083   if (yy != zz) { ierr = VecCopy(yy,zz);CHKERRQ(ierr); }
1084   ierr = VecGetArrayFast(xx,&xg);CHKERRQ(ierr); x = xg;
1085   ierr = VecGetArrayFast(zz,&zg);CHKERRQ(ierr); z = zg;
1086 
1087 
1088   idx   = a->j;
1089   v     = a->a;
1090   ii    = a->i;
1091   xb    = x;
1092 
1093   switch (bs) {
1094   case 1:
1095     for (i=0; i<mbs; i++) {
1096       n  = ii[1] - ii[0]; ii++;
1097       x1 = xb[0];
1098       ib = idx + ai[i];
1099       for (j=0; j<n; j++) {
1100         rval    = ib[j];
1101         z[rval] += *v * x1;
1102         v++;
1103       }
1104       xb++;
1105     }
1106     break;
1107   case 2:
1108     for (i=0; i<mbs; i++) {
1109       n  = ii[1] - ii[0]; ii++;
1110       x1 = xb[0]; x2 = xb[1];
1111       ib = idx + ai[i];
1112       for (j=0; j<n; j++) {
1113         rval      = ib[j]*2;
1114         z[rval++] += v[0]*x1 + v[1]*x2;
1115         z[rval++] += v[2]*x1 + v[3]*x2;
1116         v  += 4;
1117       }
1118       xb += 2;
1119     }
1120     break;
1121   case 3:
1122     for (i=0; i<mbs; i++) {
1123       n  = ii[1] - ii[0]; ii++;
1124       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1125       ib = idx + ai[i];
1126       for (j=0; j<n; j++) {
1127         rval      = ib[j]*3;
1128         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1129         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1130         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1131         v  += 9;
1132       }
1133       xb += 3;
1134     }
1135     break;
1136   case 4:
1137     for (i=0; i<mbs; i++) {
1138       n  = ii[1] - ii[0]; ii++;
1139       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1140       ib = idx + ai[i];
1141       for (j=0; j<n; j++) {
1142         rval      = ib[j]*4;
1143         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1144         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1145         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1146         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1147         v  += 16;
1148       }
1149       xb += 4;
1150     }
1151     break;
1152   case 5:
1153     for (i=0; i<mbs; i++) {
1154       n  = ii[1] - ii[0]; ii++;
1155       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1156       x4 = xb[3]; x5 = xb[4];
1157       ib = idx + ai[i];
1158       for (j=0; j<n; j++) {
1159         rval      = ib[j]*5;
1160         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1161         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1162         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1163         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1164         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1165         v  += 25;
1166       }
1167       xb += 5;
1168     }
1169     break;
1170   default: {      /* block sizes larger then 5 by 5 are handled by BLAS */
1171       int          ncols,k;
1172       PetscScalar  *work,*workt;
1173 
1174       if (!a->mult_work) {
1175         k = PetscMax(A->m,A->n);
1176         ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1177       }
1178       work = a->mult_work;
1179       for (i=0; i<mbs; i++) {
1180         n     = ii[1] - ii[0]; ii++;
1181         ncols = n*bs;
1182         ierr  = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1183         Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,work);
1184         /* LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); */
1185         v += n*bs2;
1186         x += bs;
1187         workt = work;
1188         for (j=0; j<n; j++) {
1189           zb = z + bs*(*idx++);
1190           for (k=0; k<bs; k++) zb[k] += workt[k] ;
1191           workt += bs;
1192         }
1193       }
1194     }
1195   }
1196   ierr = VecRestoreArrayFast(xx,&xg);CHKERRQ(ierr);
1197   ierr = VecRestoreArrayFast(zz,&zg);CHKERRQ(ierr);
1198   PetscLogFlops(2*a->nz*a->bs2);
1199   PetscFunctionReturn(0);
1200 }
1201 
1202 #undef __FUNCT__
1203 #define __FUNCT__ "MatScale_SeqBAIJ"
1204 int MatScale_SeqBAIJ(const PetscScalar *alpha,Mat inA)
1205 {
1206   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1207   int         totalnz = a->bs2*a->nz;
1208 #if defined(PETSC_USE_MAT_SINGLE)
1209   int         i;
1210 #else
1211   int         one = 1;
1212 #endif
1213 
1214   PetscFunctionBegin;
1215 #if defined(PETSC_USE_MAT_SINGLE)
1216   for (i=0; i<totalnz; i++) a->a[i] *= *alpha;
1217 #else
1218   BLscal_(&totalnz,(PetscScalar*)alpha,a->a,&one);
1219 #endif
1220   PetscLogFlops(totalnz);
1221   PetscFunctionReturn(0);
1222 }
1223 
1224 #undef __FUNCT__
1225 #define __FUNCT__ "MatNorm_SeqBAIJ"
1226 int MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm)
1227 {
1228   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1229   MatScalar   *v = a->a;
1230   PetscReal   sum = 0.0;
1231   int         i,j,k,bs = a->bs,nz=a->nz,bs2=a->bs2,k1;
1232 
1233   PetscFunctionBegin;
1234   if (type == NORM_FROBENIUS) {
1235     for (i=0; i< bs2*nz; i++) {
1236 #if defined(PETSC_USE_COMPLEX)
1237       sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1238 #else
1239       sum += (*v)*(*v); v++;
1240 #endif
1241     }
1242     *norm = sqrt(sum);
1243   }  else if (type == NORM_INFINITY) { /* maximum row sum */
1244     *norm = 0.0;
1245     for (k=0; k<bs; k++) {
1246       for (j=0; j<a->mbs; j++) {
1247         v = a->a + bs2*a->i[j] + k;
1248         sum = 0.0;
1249         for (i=0; i<a->i[j+1]-a->i[j]; i++) {
1250           for (k1=0; k1<bs; k1++){
1251             sum += PetscAbsScalar(*v);
1252             v   += bs;
1253           }
1254         }
1255         if (sum > *norm) *norm = sum;
1256       }
1257     }
1258   } else {
1259     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1260   }
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 
1265 #undef __FUNCT__
1266 #define __FUNCT__ "MatEqual_SeqBAIJ"
1267 int MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg)
1268 {
1269   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data;
1270   int         ierr;
1271 
1272   PetscFunctionBegin;
1273   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1274   if ((A->m != B->m) || (A->n != B->n) || (a->bs != b->bs)|| (a->nz != b->nz)) {
1275     *flg = PETSC_FALSE;
1276     PetscFunctionReturn(0);
1277   }
1278 
1279   /* if the a->i are the same */
1280   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(int),flg);CHKERRQ(ierr);
1281   if (*flg == PETSC_FALSE) {
1282     PetscFunctionReturn(0);
1283   }
1284 
1285   /* if a->j are the same */
1286   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int),flg);CHKERRQ(ierr);
1287   if (*flg == PETSC_FALSE) {
1288     PetscFunctionReturn(0);
1289   }
1290   /* if a->a are the same */
1291   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1292   PetscFunctionReturn(0);
1293 
1294 }
1295 
1296 #undef __FUNCT__
1297 #define __FUNCT__ "MatGetDiagonal_SeqBAIJ"
1298 int MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1299 {
1300   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1301   int          ierr,i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1302   PetscScalar  *x,zero = 0.0;
1303   MatScalar    *aa,*aa_j;
1304 
1305   PetscFunctionBegin;
1306   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1307   bs   = a->bs;
1308   aa   = a->a;
1309   ai   = a->i;
1310   aj   = a->j;
1311   ambs = a->mbs;
1312   bs2  = a->bs2;
1313 
1314   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1315   ierr = VecGetArrayFast(v,&x);CHKERRQ(ierr);
1316   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1317   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1318   for (i=0; i<ambs; i++) {
1319     for (j=ai[i]; j<ai[i+1]; j++) {
1320       if (aj[j] == i) {
1321         row  = i*bs;
1322         aa_j = aa+j*bs2;
1323         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1324         break;
1325       }
1326     }
1327   }
1328   ierr = VecRestoreArrayFast(v,&x);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatDiagonalScale_SeqBAIJ"
1334 int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1335 {
1336   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1337   PetscScalar  *l,*r,x,*li,*ri;
1338   MatScalar    *aa,*v;
1339   int          ierr,i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1340 
1341   PetscFunctionBegin;
1342   ai  = a->i;
1343   aj  = a->j;
1344   aa  = a->a;
1345   m   = A->m;
1346   n   = A->n;
1347   bs  = a->bs;
1348   mbs = a->mbs;
1349   bs2 = a->bs2;
1350   if (ll) {
1351     ierr = VecGetArrayFast(ll,&l);CHKERRQ(ierr);
1352     ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1353     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1354     for (i=0; i<mbs; i++) { /* for each block row */
1355       M  = ai[i+1] - ai[i];
1356       li = l + i*bs;
1357       v  = aa + bs2*ai[i];
1358       for (j=0; j<M; j++) { /* for each block */
1359         for (k=0; k<bs2; k++) {
1360           (*v++) *= li[k%bs];
1361         }
1362       }
1363     }
1364     ierr = VecRestoreArrayFast(ll,&l);CHKERRQ(ierr);
1365     PetscLogFlops(a->nz);
1366   }
1367 
1368   if (rr) {
1369     ierr = VecGetArrayFast(rr,&r);CHKERRQ(ierr);
1370     ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr);
1371     if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
1372     for (i=0; i<mbs; i++) { /* for each block row */
1373       M  = ai[i+1] - ai[i];
1374       v  = aa + bs2*ai[i];
1375       for (j=0; j<M; j++) { /* for each block */
1376         ri = r + bs*aj[ai[i]+j];
1377         for (k=0; k<bs; k++) {
1378           x = ri[k];
1379           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
1380         }
1381       }
1382     }
1383     ierr = VecRestoreArrayFast(rr,&r);CHKERRQ(ierr);
1384     PetscLogFlops(a->nz);
1385   }
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 
1390 #undef __FUNCT__
1391 #define __FUNCT__ "MatGetInfo_SeqBAIJ"
1392 int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1393 {
1394   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1395 
1396   PetscFunctionBegin;
1397   info->rows_global    = (double)A->m;
1398   info->columns_global = (double)A->n;
1399   info->rows_local     = (double)A->m;
1400   info->columns_local  = (double)A->n;
1401   info->block_size     = a->bs2;
1402   info->nz_allocated   = a->maxnz;
1403   info->nz_used        = a->bs2*a->nz;
1404   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1405   info->assemblies   = A->num_ass;
1406   info->mallocs      = a->reallocs;
1407   info->memory       = A->mem;
1408   if (A->factor) {
1409     info->fill_ratio_given  = A->info.fill_ratio_given;
1410     info->fill_ratio_needed = A->info.fill_ratio_needed;
1411     info->factor_mallocs    = A->info.factor_mallocs;
1412   } else {
1413     info->fill_ratio_given  = 0;
1414     info->fill_ratio_needed = 0;
1415     info->factor_mallocs    = 0;
1416   }
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatZeroEntries_SeqBAIJ"
1423 int MatZeroEntries_SeqBAIJ(Mat A)
1424 {
1425   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1426   int         ierr;
1427 
1428   PetscFunctionBegin;
1429   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
1430   PetscFunctionReturn(0);
1431 }
1432