xref: /petsc/src/mat/impls/baij/seq/baij2.c (revision 7a4fe282d1b349e95b3be72d69d8dd3d3bcd7bc6)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/seq/baij.h"
4 #include "../src/inline/spops.h"
5 #include "../src/inline/ilu.h"
6 #include "petscbt.h"
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ"
10 PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
11 {
12   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
13   PetscErrorCode ierr;
14   PetscInt       row,i,j,k,l,m,n,*nidx,isz,val,ival;
15   const PetscInt *idx;
16   PetscInt       start,end,*ai,*aj,bs,*nidx2;
17   PetscBT        table;
18 
19   PetscFunctionBegin;
20   m     = a->mbs;
21   ai    = a->i;
22   aj    = a->j;
23   bs    = A->rmap->bs;
24 
25   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
26 
27   ierr = PetscBTCreate(m,table);CHKERRQ(ierr);
28   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr);
29   ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr);
30 
31   for (i=0; i<is_max; i++) {
32     /* Initialise the two local arrays */
33     isz  = 0;
34     ierr = PetscBTMemzero(m,table);CHKERRQ(ierr);
35 
36     /* Extract the indices, assume there can be duplicate entries */
37     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
38     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
39 
40     /* Enter these into the temp arrays i.e mark table[row], enter row into new index */
41     for (j=0; j<n ; ++j){
42       ival = idx[j]/bs; /* convert the indices into block indices */
43       if (ival>=m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
44       if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;}
45     }
46     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
47     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
48 
49     k = 0;
50     for (j=0; j<ov; j++){ /* for each overlap*/
51       n = isz;
52       for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */
53         row   = nidx[k];
54         start = ai[row];
55         end   = ai[row+1];
56         for (l = start; l<end ; l++){
57           val = aj[l];
58           if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;}
59         }
60       }
61     }
62     /* expand the Index Set */
63     for (j=0; j<isz; j++) {
64       for (k=0; k<bs; k++)
65         nidx2[j*bs+k] = nidx[j]*bs+k;
66     }
67     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
68   }
69   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
70   ierr = PetscFree(nidx);CHKERRQ(ierr);
71   ierr = PetscFree(nidx2);CHKERRQ(ierr);
72   PetscFunctionReturn(0);
73 }
74 
75 #undef __FUNCT__
76 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private"
77 PetscErrorCode MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
78 {
79   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*c;
80   PetscErrorCode ierr;
81   PetscInt       *smap,i,k,kstart,kend,oldcols = a->nbs,*lens;
82   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
83   const PetscInt *irow,*icol;
84   PetscInt       nrows,ncols,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
85   PetscInt       *aj = a->j,*ai = a->i;
86   MatScalar      *mat_a;
87   Mat            C;
88   PetscTruth     flag,sorted;
89 
90   PetscFunctionBegin;
91   ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr);
92   if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
93 
94   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
95   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
96   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
97   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
98 
99   ierr = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr);
100   ssmap = smap;
101   ierr = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
102   ierr  = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr);
103   for (i=0; i<ncols; i++) smap[icol[i]] = i+1;
104   /* determine lens of each row */
105   for (i=0; i<nrows; i++) {
106     kstart  = ai[irow[i]];
107     kend    = kstart + a->ilen[irow[i]];
108     lens[i] = 0;
109       for (k=kstart; k<kend; k++) {
110         if (ssmap[aj[k]]) {
111           lens[i]++;
112         }
113       }
114     }
115   /* Create and fill new matrix */
116   if (scall == MAT_REUSE_MATRIX) {
117     c = (Mat_SeqBAIJ *)((*B)->data);
118 
119     if (c->mbs!=nrows || c->nbs!=ncols || (*B)->rmap->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
120     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr);
121     if (!flag) {
122       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
123     }
124     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr);
125     C = *B;
126   } else {
127     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
128     ierr = MatSetSizes(C,nrows*bs,ncols*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
129     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
130     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,lens);CHKERRQ(ierr);
131   }
132   c = (Mat_SeqBAIJ *)(C->data);
133   for (i=0; i<nrows; i++) {
134     row    = irow[i];
135     kstart = ai[row];
136     kend   = kstart + a->ilen[row];
137     mat_i  = c->i[i];
138     mat_j  = c->j + mat_i;
139     mat_a  = c->a + mat_i*bs2;
140     mat_ilen = c->ilen + i;
141     for (k=kstart; k<kend; k++) {
142       if ((tcol=ssmap[a->j[k]])) {
143         *mat_j++ = tcol - 1;
144         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
145         mat_a   += bs2;
146         (*mat_ilen)++;
147       }
148     }
149   }
150 
151   /* Free work space */
152   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
153   ierr = PetscFree(smap);CHKERRQ(ierr);
154   ierr = PetscFree(lens);CHKERRQ(ierr);
155   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
156   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
157 
158   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
159   *B = C;
160   PetscFunctionReturn(0);
161 }
162 
163 #undef __FUNCT__
164 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ"
165 PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
166 {
167   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
168   IS             is1,is2;
169   PetscErrorCode ierr;
170   PetscInt       *vary,*iary,nrows,ncols,i,bs=A->rmap->bs,count;
171   const PetscInt *irow,*icol;
172 
173   PetscFunctionBegin;
174   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
175   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
176   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
177   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
178 
179   /* Verify if the indices corespond to each element in a block
180    and form the IS with compressed IS */
181   ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr);
182   iary = vary + a->mbs;
183   ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr);
184   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
185   count = 0;
186   for (i=0; i<a->mbs; i++) {
187     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Index set does not match blocks");
188     if (vary[i]==bs) iary[count++] = i;
189   }
190   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
191 
192   ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr);
193   for (i=0; i<ncols; i++) vary[icol[i]/bs]++;
194   count = 0;
195   for (i=0; i<a->mbs; i++) {
196     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_PLIB,"Internal error in PETSc");
197     if (vary[i]==bs) iary[count++] = i;
198   }
199   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);CHKERRQ(ierr);
200   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
201   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
202   ierr = PetscFree(vary);CHKERRQ(ierr);
203 
204   ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,cs,scall,B);CHKERRQ(ierr);
205   ISDestroy(is1);
206   ISDestroy(is2);
207   PetscFunctionReturn(0);
208 }
209 
210 #undef __FUNCT__
211 #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ"
212 PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
213 {
214   PetscErrorCode ierr;
215   PetscInt       i;
216 
217   PetscFunctionBegin;
218   if (scall == MAT_INITIAL_MATRIX) {
219     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
220   }
221 
222   for (i=0; i<n; i++) {
223     ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr);
224   }
225   PetscFunctionReturn(0);
226 }
227 
228 
229 /* -------------------------------------------------------*/
230 /* Should check that shapes of vectors and matrices match */
231 /* -------------------------------------------------------*/
232 #include "petscblaslapack.h"
233 
234 #undef __FUNCT__
235 #define __FUNCT__ "MatMult_SeqBAIJ_1"
236 PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
237 {
238   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
239   PetscScalar       *z,sum;
240   const PetscScalar *x;
241   const MatScalar   *v;
242   PetscErrorCode    ierr;
243   PetscInt          mbs,i,*idx,*ii,n,*ridx=PETSC_NULL,nonzerorow=0;
244   PetscTruth        usecprow=a->compressedrow.use;
245 
246   PetscFunctionBegin;
247   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
248   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
249 
250   idx = a->j;
251   v   = a->a;
252   if (usecprow){
253     mbs  = a->compressedrow.nrows;
254     ii   = a->compressedrow.i;
255     ridx = a->compressedrow.rindex;
256   } else {
257     mbs = a->mbs;
258     ii  = a->i;
259   }
260 
261   for (i=0; i<mbs; i++) {
262     n    = ii[1] - ii[0]; ii++;
263     sum  = 0.0;
264     nonzerorow += (n>0);
265     while (n--) sum += *v++ * x[*idx++];
266     if (usecprow){
267       z[ridx[i]] = sum;
268     } else {
269       z[i] = sum;
270     }
271   }
272   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
273   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
274   ierr = PetscLogFlops(2*a->nz - nonzerorow);CHKERRQ(ierr);
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatMult_SeqBAIJ_2"
280 PetscErrorCode MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
281 {
282   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
283   PetscScalar       *z = 0,sum1,sum2,*zarray;
284   const PetscScalar *x,*xb;
285   PetscScalar       x1,x2;
286   const MatScalar   *v;
287   PetscErrorCode    ierr;
288   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
289   PetscTruth        usecprow=a->compressedrow.use;
290 
291   PetscFunctionBegin;
292   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
293   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
294 
295   idx = a->j;
296   v   = a->a;
297   if (usecprow){
298     mbs  = a->compressedrow.nrows;
299     ii   = a->compressedrow.i;
300     ridx = a->compressedrow.rindex;
301   } else {
302     mbs = a->mbs;
303     ii  = a->i;
304     z   = zarray;
305   }
306 
307   for (i=0; i<mbs; i++) {
308     n  = ii[1] - ii[0]; ii++;
309     sum1 = 0.0; sum2 = 0.0;
310     nonzerorow += (n>0);
311     for (j=0; j<n; j++) {
312       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
313       sum1 += v[0]*x1 + v[2]*x2;
314       sum2 += v[1]*x1 + v[3]*x2;
315       v += 4;
316     }
317     if (usecprow) z = zarray + 2*ridx[i];
318     z[0] = sum1; z[1] = sum2;
319     if (!usecprow) z += 2;
320   }
321   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
322   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
323   ierr = PetscLogFlops(8*a->nz - 2*nonzerorow);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "MatMult_SeqBAIJ_3"
329 PetscErrorCode MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
330 {
331   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
332   PetscScalar       *z = 0,sum1,sum2,sum3,x1,x2,x3,*zarray;
333   const PetscScalar *x,*xb;
334   const MatScalar   *v;
335   PetscErrorCode    ierr;
336   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
337   PetscTruth        usecprow=a->compressedrow.use;
338 
339 
340 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
341 #pragma disjoint(*v,*z,*xb)
342 #endif
343 
344   PetscFunctionBegin;
345   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
346   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
347 
348   idx = a->j;
349   v   = a->a;
350   if (usecprow){
351     mbs  = a->compressedrow.nrows;
352     ii   = a->compressedrow.i;
353     ridx = a->compressedrow.rindex;
354   } else {
355     mbs = a->mbs;
356     ii  = a->i;
357     z   = zarray;
358   }
359 
360   for (i=0; i<mbs; i++) {
361     n  = ii[1] - ii[0]; ii++;
362     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
363     nonzerorow += (n>0);
364     for (j=0; j<n; j++) {
365       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
366       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
367       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
368       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
369       v += 9;
370     }
371     if (usecprow) z = zarray + 3*ridx[i];
372     z[0] = sum1; z[1] = sum2; z[2] = sum3;
373     if (!usecprow) z += 3;
374   }
375   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
376   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
377   ierr = PetscLogFlops(18*a->nz - 3*nonzerorow);CHKERRQ(ierr);
378   PetscFunctionReturn(0);
379 }
380 
381 #undef __FUNCT__
382 #define __FUNCT__ "MatMult_SeqBAIJ_4"
383 PetscErrorCode MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
384 {
385   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
386   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray;
387   const PetscScalar *x,*xb;
388   const MatScalar   *v;
389   PetscErrorCode    ierr;
390   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
391   PetscTruth        usecprow=a->compressedrow.use;
392 
393   PetscFunctionBegin;
394   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
395   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
396 
397   idx = a->j;
398   v   = a->a;
399   if (usecprow){
400     mbs  = a->compressedrow.nrows;
401     ii   = a->compressedrow.i;
402     ridx = a->compressedrow.rindex;
403   } else {
404     mbs = a->mbs;
405     ii  = a->i;
406     z   = zarray;
407   }
408 
409   for (i=0; i<mbs; i++) {
410     n  = ii[1] - ii[0]; ii++;
411     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
412     nonzerorow += (n>0);
413     for (j=0; j<n; j++) {
414       xb = x + 4*(*idx++);
415       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
416       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
417       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
418       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
419       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
420       v += 16;
421     }
422     if (usecprow) z = zarray + 4*ridx[i];
423     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
424     if (!usecprow) z += 4;
425   }
426   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
427   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
428   ierr = PetscLogFlops(32*a->nz - 4*nonzerorow);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 #undef __FUNCT__
433 #define __FUNCT__ "MatMult_SeqBAIJ_5"
434 PetscErrorCode MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
435 {
436   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
437   PetscScalar       sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*z = 0,*zarray;
438   const PetscScalar *xb,*x;
439   const MatScalar   *v;
440   PetscErrorCode    ierr;
441   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
442   PetscTruth        usecprow=a->compressedrow.use;
443 
444   PetscFunctionBegin;
445   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
446   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
447 
448   idx = a->j;
449   v   = a->a;
450   if (usecprow){
451     mbs  = a->compressedrow.nrows;
452     ii   = a->compressedrow.i;
453     ridx = a->compressedrow.rindex;
454   } else {
455     mbs = a->mbs;
456     ii  = a->i;
457     z   = zarray;
458   }
459 
460   for (i=0; i<mbs; i++) {
461     n  = ii[1] - ii[0]; ii++;
462     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
463     nonzerorow += (n>0);
464     for (j=0; j<n; j++) {
465       xb = x + 5*(*idx++);
466       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
467       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
468       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
469       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
470       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
471       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
472       v += 25;
473     }
474     if (usecprow) z = zarray + 5*ridx[i];
475     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
476     if (!usecprow) z += 5;
477   }
478   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
479   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
480   ierr = PetscLogFlops(50*a->nz - 5*nonzerorow);CHKERRQ(ierr);
481   PetscFunctionReturn(0);
482 }
483 
484 
485 #undef __FUNCT__
486 #define __FUNCT__ "MatMult_SeqBAIJ_6"
487 PetscErrorCode MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz)
488 {
489   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
490   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,sum5,sum6;
491   const PetscScalar *x,*xb;
492   PetscScalar       x1,x2,x3,x4,x5,x6,*zarray;
493   const MatScalar   *v;
494   PetscErrorCode    ierr;
495   PetscInt          mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
496   PetscTruth        usecprow=a->compressedrow.use;
497 
498   PetscFunctionBegin;
499   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
500   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
501 
502   idx = a->j;
503   v   = a->a;
504   if (usecprow){
505     mbs  = a->compressedrow.nrows;
506     ii   = a->compressedrow.i;
507     ridx = a->compressedrow.rindex;
508   } else {
509     mbs = a->mbs;
510     ii  = a->i;
511     z   = zarray;
512   }
513 
514   for (i=0; i<mbs; i++) {
515     n  = ii[1] - ii[0]; ii++;
516     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0;
517     nonzerorow += (n>0);
518     for (j=0; j<n; j++) {
519       xb = x + 6*(*idx++);
520       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
521       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
522       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
523       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
524       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
525       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
526       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
527       v += 36;
528     }
529     if (usecprow) z = zarray + 6*ridx[i];
530     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
531     if (!usecprow) z += 6;
532   }
533 
534   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
535   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
536   ierr = PetscLogFlops(72*a->nz - 6*nonzerorow);CHKERRQ(ierr);
537   PetscFunctionReturn(0);
538 }
539 #undef __FUNCT__
540 #define __FUNCT__ "MatMult_SeqBAIJ_7"
541 PetscErrorCode MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
542 {
543   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
544   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
545   const PetscScalar *x,*xb;
546   PetscScalar       x1,x2,x3,x4,x5,x6,x7,*zarray;
547   const MatScalar   *v;
548   PetscErrorCode    ierr;
549   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
550   PetscTruth        usecprow=a->compressedrow.use;
551 
552   PetscFunctionBegin;
553   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
554   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
555 
556   idx = a->j;
557   v   = a->a;
558   if (usecprow){
559     mbs    = a->compressedrow.nrows;
560     ii     = a->compressedrow.i;
561     ridx = a->compressedrow.rindex;
562   } else {
563     mbs = a->mbs;
564     ii  = a->i;
565     z   = zarray;
566   }
567 
568   for (i=0; i<mbs; i++) {
569     n  = ii[1] - ii[0]; ii++;
570     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
571     nonzerorow += (n>0);
572     for (j=0; j<n; j++) {
573       xb = x + 7*(*idx++);
574       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
575       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
576       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
577       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
578       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
579       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
580       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
581       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
582       v += 49;
583     }
584     if (usecprow) z = zarray + 7*ridx[i];
585     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
586     if (!usecprow) z += 7;
587   }
588 
589   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
590   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
591   ierr = PetscLogFlops(98*a->nz - 7*nonzerorow);CHKERRQ(ierr);
592   PetscFunctionReturn(0);
593 }
594 
595 /*
596     This will not work with MatScalar == float because it calls the BLAS
597 */
598 #undef __FUNCT__
599 #define __FUNCT__ "MatMult_SeqBAIJ_N"
600 PetscErrorCode MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
601 {
602   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
603   PetscScalar    *x,*z = 0,*xb,*work,*workt,*zarray;
604   MatScalar      *v;
605   PetscErrorCode ierr;
606   PetscInt       mbs=a->mbs,i,*idx,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2;
607   PetscInt       ncols,k,*ridx=PETSC_NULL,nonzerorow=0;
608   PetscTruth     usecprow=a->compressedrow.use;
609 
610   PetscFunctionBegin;
611   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
612   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
613 
614   idx = a->j;
615   v   = a->a;
616   if (usecprow){
617     mbs  = a->compressedrow.nrows;
618     ii   = a->compressedrow.i;
619     ridx = a->compressedrow.rindex;
620   } else {
621     mbs = a->mbs;
622     ii  = a->i;
623     z   = zarray;
624   }
625 
626   if (!a->mult_work) {
627     k    = PetscMax(A->rmap->n,A->cmap->n);
628     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
629   }
630   work = a->mult_work;
631   for (i=0; i<mbs; i++) {
632     n     = ii[1] - ii[0]; ii++;
633     ncols = n*bs;
634     workt = work;
635     nonzerorow += (n>0);
636     for (j=0; j<n; j++) {
637       xb = x + bs*(*idx++);
638       for (k=0; k<bs; k++) workt[k] = xb[k];
639       workt += bs;
640     }
641     if (usecprow) z = zarray + bs*ridx[i];
642     Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z);
643     /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */
644     v += n*bs2;
645     if (!usecprow) z += bs;
646   }
647   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
648   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
649   ierr = PetscLogFlops(2*a->nz*bs2 - bs*nonzerorow);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 extern PetscErrorCode VecCopy_Seq(Vec,Vec);
654 #undef __FUNCT__
655 #define __FUNCT__ "MatMultAdd_SeqBAIJ_1"
656 PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
657 {
658   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
659   PetscScalar    *x,*y = 0,*z = 0,sum,*yarray,*zarray;
660   MatScalar      *v;
661   PetscErrorCode ierr;
662   PetscInt       mbs=a->mbs,i,*idx,*ii,n,*ridx=PETSC_NULL;
663   PetscTruth     usecprow=a->compressedrow.use;
664 
665   PetscFunctionBegin;
666   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
667   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
668   if (zz != yy) {
669     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
670   } else {
671     zarray = yarray;
672   }
673 
674   idx = a->j;
675   v   = a->a;
676   if (usecprow){
677     if (zz != yy){
678       ierr = PetscMemcpy(zarray,yarray,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
679     }
680     mbs  = a->compressedrow.nrows;
681     ii   = a->compressedrow.i;
682     ridx = a->compressedrow.rindex;
683   } else {
684     ii  = a->i;
685     y   = yarray;
686     z   = zarray;
687   }
688 
689   for (i=0; i<mbs; i++) {
690     n    = ii[1] - ii[0]; ii++;
691     if (usecprow){
692       z = zarray + ridx[i];
693       y = yarray + ridx[i];
694     }
695     sum = y[0];
696     while (n--) sum += *v++ * x[*idx++];
697     z[0] = sum;
698     if (!usecprow){
699       z++; y++;
700     }
701   }
702   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
703   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
704   if (zz != yy) {
705     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
706   }
707   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMultAdd_SeqBAIJ_2"
713 PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
714 {
715   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
716   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2;
717   PetscScalar    x1,x2,*yarray,*zarray;
718   MatScalar      *v;
719   PetscErrorCode ierr;
720   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
721   PetscTruth     usecprow=a->compressedrow.use;
722 
723   PetscFunctionBegin;
724   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
725   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
726   if (zz != yy) {
727     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
728   } else {
729     zarray = yarray;
730   }
731 
732   idx = a->j;
733   v   = a->a;
734   if (usecprow){
735     if (zz != yy){
736       ierr = PetscMemcpy(zarray,yarray,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
737     }
738     mbs  = a->compressedrow.nrows;
739     ii   = a->compressedrow.i;
740     ridx = a->compressedrow.rindex;
741     if (zz != yy){
742       ierr = PetscMemcpy(zarray,yarray,a->mbs*sizeof(PetscScalar));CHKERRQ(ierr);
743     }
744   } else {
745     ii  = a->i;
746     y   = yarray;
747     z   = zarray;
748   }
749 
750   for (i=0; i<mbs; i++) {
751     n  = ii[1] - ii[0]; ii++;
752     if (usecprow){
753       z = zarray + 2*ridx[i];
754       y = yarray + 2*ridx[i];
755     }
756     sum1 = y[0]; sum2 = y[1];
757     for (j=0; j<n; j++) {
758       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
759       sum1 += v[0]*x1 + v[2]*x2;
760       sum2 += v[1]*x1 + v[3]*x2;
761       v += 4;
762     }
763     z[0] = sum1; z[1] = sum2;
764     if (!usecprow){
765       z += 2; y += 2;
766     }
767   }
768   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
769   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
770   if (zz != yy) {
771     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
772   }
773   ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr);
774   PetscFunctionReturn(0);
775 }
776 
777 #undef __FUNCT__
778 #define __FUNCT__ "MatMultAdd_SeqBAIJ_3"
779 PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
780 {
781   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
782   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,x1,x2,x3,*yarray,*zarray;
783   MatScalar      *v;
784   PetscErrorCode ierr;
785   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
786   PetscTruth     usecprow=a->compressedrow.use;
787 
788   PetscFunctionBegin;
789   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
790   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
791   if (zz != yy) {
792     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
793   } else {
794     zarray = yarray;
795   }
796 
797   idx = a->j;
798   v   = a->a;
799   if (usecprow){
800     if (zz != yy){
801       ierr = PetscMemcpy(zarray,yarray,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
802     }
803     mbs  = a->compressedrow.nrows;
804     ii   = a->compressedrow.i;
805     ridx = a->compressedrow.rindex;
806   } else {
807     ii  = a->i;
808     y   = yarray;
809     z   = zarray;
810   }
811 
812   for (i=0; i<mbs; i++) {
813     n  = ii[1] - ii[0]; ii++;
814     if (usecprow){
815       z = zarray + 3*ridx[i];
816       y = yarray + 3*ridx[i];
817     }
818     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
819     for (j=0; j<n; j++) {
820       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
821       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
822       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
823       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
824       v += 9;
825     }
826     z[0] = sum1; z[1] = sum2; z[2] = sum3;
827     if (!usecprow){
828       z += 3; y += 3;
829     }
830   }
831   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
832   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
833   if (zz != yy) {
834     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
835   }
836   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
837   PetscFunctionReturn(0);
838 }
839 
840 #undef __FUNCT__
841 #define __FUNCT__ "MatMultAdd_SeqBAIJ_4"
842 PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
843 {
844   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
845   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*yarray,*zarray;
846   MatScalar      *v;
847   PetscErrorCode ierr;
848   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
849   PetscTruth     usecprow=a->compressedrow.use;
850 
851   PetscFunctionBegin;
852   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
853   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
854   if (zz != yy) {
855     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
856   } else {
857     zarray = yarray;
858   }
859 
860   idx   = a->j;
861   v     = a->a;
862   if (usecprow){
863     if (zz != yy){
864       ierr = PetscMemcpy(zarray,yarray,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
865     }
866     mbs  = a->compressedrow.nrows;
867     ii   = a->compressedrow.i;
868     ridx = a->compressedrow.rindex;
869   } else {
870     ii  = a->i;
871     y   = yarray;
872     z   = zarray;
873   }
874 
875   for (i=0; i<mbs; i++) {
876     n  = ii[1] - ii[0]; ii++;
877     if (usecprow){
878       z = zarray + 4*ridx[i];
879       y = yarray + 4*ridx[i];
880     }
881     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
882     for (j=0; j<n; j++) {
883       xb = x + 4*(*idx++);
884       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
885       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
886       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
887       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
888       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
889       v += 16;
890     }
891     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
892     if (!usecprow){
893       z += 4; y += 4;
894     }
895   }
896   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
897   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
898   if (zz != yy) {
899     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
900   }
901   ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr);
902   PetscFunctionReturn(0);
903 }
904 
905 #undef __FUNCT__
906 #define __FUNCT__ "MatMultAdd_SeqBAIJ_5"
907 PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
908 {
909   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
910   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
911   PetscScalar    *yarray,*zarray;
912   MatScalar      *v;
913   PetscErrorCode ierr;
914   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
915   PetscTruth     usecprow=a->compressedrow.use;
916 
917   PetscFunctionBegin;
918   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
919   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
920   if (zz != yy) {
921     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
922   } else {
923     zarray = yarray;
924   }
925 
926   idx = a->j;
927   v   = a->a;
928   if (usecprow){
929     if (zz != yy){
930       ierr = PetscMemcpy(zarray,yarray,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
931     }
932     mbs  = a->compressedrow.nrows;
933     ii   = a->compressedrow.i;
934     ridx = a->compressedrow.rindex;
935   } else {
936     ii  = a->i;
937     y   = yarray;
938     z   = zarray;
939   }
940 
941   for (i=0; i<mbs; i++) {
942     n  = ii[1] - ii[0]; ii++;
943     if (usecprow){
944       z = zarray + 5*ridx[i];
945       y = yarray + 5*ridx[i];
946     }
947     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
948     for (j=0; j<n; j++) {
949       xb = x + 5*(*idx++);
950       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
951       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
952       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
953       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
954       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
955       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
956       v += 25;
957     }
958     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
959     if (!usecprow){
960       z += 5; y += 5;
961     }
962   }
963   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
964   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
965   if (zz != yy) {
966     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
967   }
968   ierr = PetscLogFlops(50*a->nz);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 #undef __FUNCT__
972 #define __FUNCT__ "MatMultAdd_SeqBAIJ_6"
973 PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
974 {
975   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
976   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
977   PetscScalar    x1,x2,x3,x4,x5,x6,*yarray,*zarray;
978   MatScalar      *v;
979   PetscErrorCode ierr;
980   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
981   PetscTruth     usecprow=a->compressedrow.use;
982 
983   PetscFunctionBegin;
984   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
985   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
986   if (zz != yy) {
987     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
988   } else {
989     zarray = yarray;
990   }
991 
992   idx = a->j;
993   v   = a->a;
994   if (usecprow){
995     if (zz != yy){
996       ierr = PetscMemcpy(zarray,yarray,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
997     }
998     mbs  = a->compressedrow.nrows;
999     ii   = a->compressedrow.i;
1000     ridx = a->compressedrow.rindex;
1001   } else {
1002     ii  = a->i;
1003     y   = yarray;
1004     z   = zarray;
1005   }
1006 
1007   for (i=0; i<mbs; i++) {
1008     n  = ii[1] - ii[0]; ii++;
1009     if (usecprow){
1010       z = zarray + 6*ridx[i];
1011       y = yarray + 6*ridx[i];
1012     }
1013     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5];
1014     for (j=0; j<n; j++) {
1015       xb = x + 6*(*idx++);
1016       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
1017       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
1018       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
1019       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
1020       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
1021       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
1022       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
1023       v += 36;
1024     }
1025     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
1026     if (!usecprow){
1027       z += 6; y += 6;
1028     }
1029   }
1030   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1031   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
1032   if (zz != yy) {
1033     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1034   }
1035   ierr = PetscLogFlops(72*a->nz);CHKERRQ(ierr);
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 #undef __FUNCT__
1040 #define __FUNCT__ "MatMultAdd_SeqBAIJ_7"
1041 PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1042 {
1043   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1044   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1045   PetscScalar    x1,x2,x3,x4,x5,x6,x7,*yarray,*zarray;
1046   MatScalar      *v;
1047   PetscErrorCode ierr;
1048   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
1049   PetscTruth     usecprow=a->compressedrow.use;
1050 
1051   PetscFunctionBegin;
1052   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1053   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
1054   if (zz != yy) {
1055     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
1056   } else {
1057     zarray = yarray;
1058   }
1059 
1060   idx = a->j;
1061   v   = a->a;
1062   if (usecprow){
1063     if (zz != yy){
1064       ierr = PetscMemcpy(zarray,yarray,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1065     }
1066     mbs  = a->compressedrow.nrows;
1067     ii   = a->compressedrow.i;
1068     ridx = a->compressedrow.rindex;
1069   } else {
1070     ii  = a->i;
1071     y   = yarray;
1072     z   = zarray;
1073   }
1074 
1075   for (i=0; i<mbs; i++) {
1076     n  = ii[1] - ii[0]; ii++;
1077     if (usecprow){
1078       z = zarray + 7*ridx[i];
1079       y = yarray + 7*ridx[i];
1080     }
1081     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
1082     for (j=0; j<n; j++) {
1083       xb = x + 7*(*idx++);
1084       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1085       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1086       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1087       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1088       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1089       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1090       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1091       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1092       v += 49;
1093     }
1094     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1095     if (!usecprow){
1096       z += 7; y += 7;
1097     }
1098   }
1099   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1100   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
1101   if (zz != yy) {
1102     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1103   }
1104   ierr = PetscLogFlops(98*a->nz);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatMultAdd_SeqBAIJ_N"
1110 PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1111 {
1112   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1113   PetscScalar    *x,*z = 0,*xb,*work,*workt,*zarray;
1114   MatScalar      *v;
1115   PetscErrorCode ierr;
1116   PetscInt       mbs,i,*idx,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2;
1117   PetscInt       ncols,k,*ridx=PETSC_NULL;
1118   PetscTruth     usecprow=a->compressedrow.use;
1119 
1120   PetscFunctionBegin;
1121   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
1122   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1123   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
1124 
1125   idx = a->j;
1126   v   = a->a;
1127   if (usecprow){
1128     mbs    = a->compressedrow.nrows;
1129     ii     = a->compressedrow.i;
1130     ridx = a->compressedrow.rindex;
1131   } else {
1132     mbs = a->mbs;
1133     ii  = a->i;
1134     z   = zarray;
1135   }
1136 
1137   if (!a->mult_work) {
1138     k    = PetscMax(A->rmap->n,A->cmap->n);
1139     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1140   }
1141   work = a->mult_work;
1142   for (i=0; i<mbs; i++) {
1143     n     = ii[1] - ii[0]; ii++;
1144     ncols = n*bs;
1145     workt = work;
1146     for (j=0; j<n; j++) {
1147       xb = x + bs*(*idx++);
1148       for (k=0; k<bs; k++) workt[k] = xb[k];
1149       workt += bs;
1150     }
1151     if (usecprow) z = zarray + bs*ridx[i];
1152     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
1153     /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */
1154     v += n*bs2;
1155     if (!usecprow){
1156       z += bs;
1157     }
1158   }
1159   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1160   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1161   ierr = PetscLogFlops(2*a->nz*bs2);CHKERRQ(ierr);
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 #undef __FUNCT__
1166 #define __FUNCT__ "MatMultTranspose_SeqBAIJ"
1167 PetscErrorCode MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz)
1168 {
1169   PetscScalar    zero = 0.0;
1170   PetscErrorCode ierr;
1171 
1172   PetscFunctionBegin;
1173   ierr = VecSet(zz,zero);CHKERRQ(ierr);
1174   ierr = MatMultTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 #undef __FUNCT__
1179 #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ"
1180 PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1181 
1182 {
1183   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1184   PetscScalar       *zb,*x,*z,*xb = 0,x1,x2,x3,x4,x5;
1185   MatScalar         *v;
1186   PetscErrorCode    ierr;
1187   PetscInt          mbs,i,*idx,*ii,rval,bs=A->rmap->bs,j,n,bs2=a->bs2,*ib,*ridx=PETSC_NULL;
1188   Mat_CompressedRow cprow = a->compressedrow;
1189   PetscTruth        usecprow=cprow.use;
1190 
1191   PetscFunctionBegin;
1192   if (yy != zz) { ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); }
1193   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1194   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
1195 
1196   idx = a->j;
1197   v   = a->a;
1198   if (usecprow){
1199     mbs  = cprow.nrows;
1200     ii   = cprow.i;
1201     ridx = cprow.rindex;
1202   } else {
1203     mbs=a->mbs;
1204     ii = a->i;
1205     xb = x;
1206   }
1207 
1208   switch (bs) {
1209   case 1:
1210     for (i=0; i<mbs; i++) {
1211       if (usecprow) xb = x + ridx[i];
1212       x1 = xb[0];
1213       ib = idx + ii[0];
1214       n  = ii[1] - ii[0]; ii++;
1215       for (j=0; j<n; j++) {
1216         rval    = ib[j];
1217         z[rval] += *v * x1;
1218         v++;
1219       }
1220       if (!usecprow) xb++;
1221     }
1222     break;
1223   case 2:
1224     for (i=0; i<mbs; i++) {
1225       if (usecprow) xb = x + 2*ridx[i];
1226       x1 = xb[0]; x2 = xb[1];
1227       ib = idx + ii[0];
1228       n  = ii[1] - ii[0]; ii++;
1229       for (j=0; j<n; j++) {
1230         rval      = ib[j]*2;
1231         z[rval++] += v[0]*x1 + v[1]*x2;
1232         z[rval++] += v[2]*x1 + v[3]*x2;
1233         v  += 4;
1234       }
1235       if (!usecprow) xb += 2;
1236     }
1237     break;
1238   case 3:
1239     for (i=0; i<mbs; i++) {
1240       if (usecprow) xb = x + 3*ridx[i];
1241       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1242       ib = idx + ii[0];
1243       n  = ii[1] - ii[0]; ii++;
1244       for (j=0; j<n; j++) {
1245         rval      = ib[j]*3;
1246         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1247         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1248         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1249         v  += 9;
1250       }
1251       if (!usecprow) xb += 3;
1252     }
1253     break;
1254   case 4:
1255     for (i=0; i<mbs; i++) {
1256       if (usecprow) xb = x + 4*ridx[i];
1257       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1258       ib = idx + ii[0];
1259       n  = ii[1] - ii[0]; ii++;
1260       for (j=0; j<n; j++) {
1261         rval      = ib[j]*4;
1262         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1263         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1264         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1265         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1266         v  += 16;
1267       }
1268       if (!usecprow) xb += 4;
1269     }
1270     break;
1271   case 5:
1272     for (i=0; i<mbs; i++) {
1273       if (usecprow) xb = x + 5*ridx[i];
1274       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1275       x4 = xb[3]; x5 = xb[4];
1276       ib = idx + ii[0];
1277       n  = ii[1] - ii[0]; ii++;
1278       for (j=0; j<n; j++) {
1279         rval      = ib[j]*5;
1280         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1281         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1282         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1283         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1284         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1285         v  += 25;
1286       }
1287       if (!usecprow) xb += 5;
1288     }
1289     break;
1290   default: {      /* block sizes larger then 5 by 5 are handled by BLAS */
1291       PetscInt     ncols,k;
1292       PetscScalar  *work,*workt,*xtmp;
1293 
1294       if (!a->mult_work) {
1295         k = PetscMax(A->rmap->n,A->cmap->n);
1296         ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1297       }
1298       work = a->mult_work;
1299       xtmp = x;
1300       for (i=0; i<mbs; i++) {
1301         n     = ii[1] - ii[0]; ii++;
1302         ncols = n*bs;
1303         ierr  = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1304         if (usecprow) {
1305           xtmp = x + bs*ridx[i];
1306         }
1307         Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work);
1308         /* BLASgemv_("T",&bs,&ncols,&_DOne,v,&bs,xtmp,&_One,&_DOne,work,&_One); */
1309         v += n*bs2;
1310         if (!usecprow) xtmp += bs;
1311         workt = work;
1312         for (j=0; j<n; j++) {
1313           zb = z + bs*(*idx++);
1314           for (k=0; k<bs; k++) zb[k] += workt[k] ;
1315           workt += bs;
1316         }
1317       }
1318     }
1319   }
1320   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1321   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
1322   ierr = PetscLogFlops(2*a->nz*a->bs2);CHKERRQ(ierr);
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatScale_SeqBAIJ"
1328 PetscErrorCode MatScale_SeqBAIJ(Mat inA,PetscScalar alpha)
1329 {
1330   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1331   PetscInt       totalnz = a->bs2*a->nz;
1332   PetscScalar    oalpha = alpha;
1333   PetscErrorCode ierr;
1334   PetscBLASInt   one = 1,tnz = PetscBLASIntCast(totalnz);
1335 
1336   PetscFunctionBegin;
1337   BLASscal_(&tnz,&oalpha,a->a,&one);
1338   ierr = PetscLogFlops(totalnz);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatNorm_SeqBAIJ"
1344 PetscErrorCode MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm)
1345 {
1346   PetscErrorCode ierr;
1347   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1348   MatScalar      *v = a->a;
1349   PetscReal      sum = 0.0;
1350   PetscInt       i,j,k,bs=A->rmap->bs,nz=a->nz,bs2=a->bs2,k1;
1351 
1352   PetscFunctionBegin;
1353   if (type == NORM_FROBENIUS) {
1354     for (i=0; i< bs2*nz; i++) {
1355 #if defined(PETSC_USE_COMPLEX)
1356       sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1357 #else
1358       sum += (*v)*(*v); v++;
1359 #endif
1360     }
1361     *norm = sqrt(sum);
1362   } else if (type == NORM_1) { /* maximum column sum */
1363     PetscReal *tmp;
1364     PetscInt  *bcol = a->j;
1365     ierr = PetscMalloc((A->cmap->n+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1366     ierr = PetscMemzero(tmp,A->cmap->n*sizeof(PetscReal));CHKERRQ(ierr);
1367     for (i=0; i<nz; i++){
1368       for (j=0; j<bs; j++){
1369         k1 = bs*(*bcol) + j; /* column index */
1370         for (k=0; k<bs; k++){
1371           tmp[k1] += PetscAbsScalar(*v); v++;
1372         }
1373       }
1374       bcol++;
1375     }
1376     *norm = 0.0;
1377     for (j=0; j<A->cmap->n; j++) {
1378       if (tmp[j] > *norm) *norm = tmp[j];
1379     }
1380     ierr = PetscFree(tmp);CHKERRQ(ierr);
1381   } else if (type == NORM_INFINITY) { /* maximum row sum */
1382     *norm = 0.0;
1383     for (k=0; k<bs; k++) {
1384       for (j=0; j<a->mbs; j++) {
1385         v = a->a + bs2*a->i[j] + k;
1386         sum = 0.0;
1387         for (i=0; i<a->i[j+1]-a->i[j]; i++) {
1388           for (k1=0; k1<bs; k1++){
1389             sum += PetscAbsScalar(*v);
1390             v   += bs;
1391           }
1392         }
1393         if (sum > *norm) *norm = sum;
1394       }
1395     }
1396   } else {
1397     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1398   }
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MatEqual_SeqBAIJ"
1405 PetscErrorCode MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg)
1406 {
1407   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data;
1408   PetscErrorCode ierr;
1409 
1410   PetscFunctionBegin;
1411   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1412   if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1413     *flg = PETSC_FALSE;
1414     PetscFunctionReturn(0);
1415   }
1416 
1417   /* if the a->i are the same */
1418   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1419   if (!*flg) {
1420     PetscFunctionReturn(0);
1421   }
1422 
1423   /* if a->j are the same */
1424   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1425   if (!*flg) {
1426     PetscFunctionReturn(0);
1427   }
1428   /* if a->a are the same */
1429   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(B->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1430   PetscFunctionReturn(0);
1431 
1432 }
1433 
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatGetDiagonal_SeqBAIJ"
1436 PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1437 {
1438   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1439   PetscErrorCode ierr;
1440   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1441   PetscScalar    *x,zero = 0.0;
1442   MatScalar      *aa,*aa_j;
1443 
1444   PetscFunctionBegin;
1445   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1446   bs   = A->rmap->bs;
1447   aa   = a->a;
1448   ai   = a->i;
1449   aj   = a->j;
1450   ambs = a->mbs;
1451   bs2  = a->bs2;
1452 
1453   ierr = VecSet(v,zero);CHKERRQ(ierr);
1454   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1455   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1456   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1457   for (i=0; i<ambs; i++) {
1458     for (j=ai[i]; j<ai[i+1]; j++) {
1459       if (aj[j] == i) {
1460         row  = i*bs;
1461         aa_j = aa+j*bs2;
1462         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1463         break;
1464       }
1465     }
1466   }
1467   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "MatDiagonalScale_SeqBAIJ"
1473 PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1474 {
1475   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1476   PetscScalar    *l,*r,x,*li,*ri;
1477   MatScalar      *aa,*v;
1478   PetscErrorCode ierr;
1479   PetscInt       i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1480 
1481   PetscFunctionBegin;
1482   ai  = a->i;
1483   aj  = a->j;
1484   aa  = a->a;
1485   m   = A->rmap->n;
1486   n   = A->cmap->n;
1487   bs  = A->rmap->bs;
1488   mbs = a->mbs;
1489   bs2 = a->bs2;
1490   if (ll) {
1491     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
1492     ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1493     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1494     for (i=0; i<mbs; i++) { /* for each block row */
1495       M  = ai[i+1] - ai[i];
1496       li = l + i*bs;
1497       v  = aa + bs2*ai[i];
1498       for (j=0; j<M; j++) { /* for each block */
1499         for (k=0; k<bs2; k++) {
1500           (*v++) *= li[k%bs];
1501         }
1502       }
1503     }
1504     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1505     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1506   }
1507 
1508   if (rr) {
1509     ierr = VecGetArray(rr,&r);CHKERRQ(ierr);
1510     ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr);
1511     if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
1512     for (i=0; i<mbs; i++) { /* for each block row */
1513       M  = ai[i+1] - ai[i];
1514       v  = aa + bs2*ai[i];
1515       for (j=0; j<M; j++) { /* for each block */
1516         ri = r + bs*aj[ai[i]+j];
1517         for (k=0; k<bs; k++) {
1518           x = ri[k];
1519           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
1520         }
1521       }
1522     }
1523     ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr);
1524     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1525   }
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 
1530 #undef __FUNCT__
1531 #define __FUNCT__ "MatGetInfo_SeqBAIJ"
1532 PetscErrorCode MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1533 {
1534   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1535 
1536   PetscFunctionBegin;
1537   info->block_size     = a->bs2;
1538   info->nz_allocated   = a->maxnz;
1539   info->nz_used        = a->bs2*a->nz;
1540   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1541   info->assemblies   = A->num_ass;
1542   info->mallocs      = a->reallocs;
1543   info->memory       = ((PetscObject)A)->mem;
1544   if (A->factor) {
1545     info->fill_ratio_given  = A->info.fill_ratio_given;
1546     info->fill_ratio_needed = A->info.fill_ratio_needed;
1547     info->factor_mallocs    = A->info.factor_mallocs;
1548   } else {
1549     info->fill_ratio_given  = 0;
1550     info->fill_ratio_needed = 0;
1551     info->factor_mallocs    = 0;
1552   }
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 
1557 #undef __FUNCT__
1558 #define __FUNCT__ "MatZeroEntries_SeqBAIJ"
1559 PetscErrorCode MatZeroEntries_SeqBAIJ(Mat A)
1560 {
1561   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1562   PetscErrorCode ierr;
1563 
1564   PetscFunctionBegin;
1565   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
1566   PetscFunctionReturn(0);
1567 }
1568