xref: /petsc/src/mat/impls/baij/seq/baij2.c (revision ce0a2cd1da0658c2b28aad1be2e2c8e41567bece)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/seq/baij.h"
4 #include "src/inline/spops.h"
5 #include "src/inline/ilu.h"
6 #include "petscbt.h"
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ"
10 PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
11 {
12   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
13   PetscErrorCode ierr;
14   PetscInt       row,i,j,k,l,m,n,*idx,*nidx,isz,val,ival;
15   PetscInt       start,end,*ai,*aj,bs,*nidx2;
16   PetscBT        table;
17 
18   PetscFunctionBegin;
19   m     = a->mbs;
20   ai    = a->i;
21   aj    = a->j;
22   bs    = A->rmap.bs;
23 
24   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
25 
26   ierr = PetscBTCreate(m,table);CHKERRQ(ierr);
27   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr);
28   ierr = PetscMalloc((A->rmap.N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr);
29 
30   for (i=0; i<is_max; i++) {
31     /* Initialise the two local arrays */
32     isz  = 0;
33     ierr = PetscBTMemzero(m,table);CHKERRQ(ierr);
34 
35     /* Extract the indices, assume there can be duplicate entries */
36     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
37     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
38 
39     /* Enter these into the temp arrays i.e mark table[row], enter row into new index */
40     for (j=0; j<n ; ++j){
41       ival = idx[j]/bs; /* convert the indices into block indices */
42       if (ival>=m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
43       if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;}
44     }
45     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
46     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
47 
48     k = 0;
49     for (j=0; j<ov; j++){ /* for each overlap*/
50       n = isz;
51       for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */
52         row   = nidx[k];
53         start = ai[row];
54         end   = ai[row+1];
55         for (l = start; l<end ; l++){
56           val = aj[l];
57           if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;}
58         }
59       }
60     }
61     /* expand the Index Set */
62     for (j=0; j<isz; j++) {
63       for (k=0; k<bs; k++)
64         nidx2[j*bs+k] = nidx[j]*bs+k;
65     }
66     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
67   }
68   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
69   ierr = PetscFree(nidx);CHKERRQ(ierr);
70   ierr = PetscFree(nidx2);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private"
76 PetscErrorCode MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
77 {
78   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*c;
79   PetscErrorCode ierr;
80   PetscInt       *smap,i,k,kstart,kend,oldcols = a->nbs,*lens;
81   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
82   PetscInt       *irow,*icol,nrows,ncols,*ssmap,bs=A->rmap.bs,bs2=a->bs2;
83   PetscInt       *aj = a->j,*ai = a->i;
84   MatScalar      *mat_a;
85   Mat            C;
86   PetscTruth     flag;
87 
88   PetscFunctionBegin;
89   ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr);
90   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
91 
92   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
93   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
94   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
95   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
96 
97   ierr = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr);
98   ssmap = smap;
99   ierr = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
100   ierr  = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr);
101   for (i=0; i<ncols; i++) smap[icol[i]] = i+1;
102   /* determine lens of each row */
103   for (i=0; i<nrows; i++) {
104     kstart  = ai[irow[i]];
105     kend    = kstart + a->ilen[irow[i]];
106     lens[i] = 0;
107       for (k=kstart; k<kend; k++) {
108         if (ssmap[aj[k]]) {
109           lens[i]++;
110         }
111       }
112     }
113   /* Create and fill new matrix */
114   if (scall == MAT_REUSE_MATRIX) {
115     c = (Mat_SeqBAIJ *)((*B)->data);
116 
117     if (c->mbs!=nrows || c->nbs!=ncols || (*B)->rmap.bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
118     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr);
119     if (!flag) {
120       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
121     }
122     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr);
123     C = *B;
124   } else {
125     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
126     ierr = MatSetSizes(C,nrows*bs,ncols*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
127     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
128     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,lens);CHKERRQ(ierr);
129   }
130   c = (Mat_SeqBAIJ *)(C->data);
131   for (i=0; i<nrows; i++) {
132     row    = irow[i];
133     kstart = ai[row];
134     kend   = kstart + a->ilen[row];
135     mat_i  = c->i[i];
136     mat_j  = c->j + mat_i;
137     mat_a  = c->a + mat_i*bs2;
138     mat_ilen = c->ilen + i;
139     for (k=kstart; k<kend; k++) {
140       if ((tcol=ssmap[a->j[k]])) {
141         *mat_j++ = tcol - 1;
142         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
143         mat_a   += bs2;
144         (*mat_ilen)++;
145       }
146     }
147   }
148 
149   /* Free work space */
150   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
151   ierr = PetscFree(smap);CHKERRQ(ierr);
152   ierr = PetscFree(lens);CHKERRQ(ierr);
153   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
154   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
155 
156   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
157   *B = C;
158   PetscFunctionReturn(0);
159 }
160 
161 #undef __FUNCT__
162 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ"
163 PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
164 {
165   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
166   IS             is1,is2;
167   PetscErrorCode ierr;
168   PetscInt       *vary,*iary,*irow,*icol,nrows,ncols,i,bs=A->rmap.bs,count;
169 
170   PetscFunctionBegin;
171   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
172   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
173   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
174   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
175 
176   /* Verify if the indices corespond to each element in a block
177    and form the IS with compressed IS */
178   ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr);
179   iary = vary + a->mbs;
180   ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr);
181   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
182   count = 0;
183   for (i=0; i<a->mbs; i++) {
184     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Index set does not match blocks");
185     if (vary[i]==bs) iary[count++] = i;
186   }
187   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
188 
189   ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr);
190   for (i=0; i<ncols; i++) vary[icol[i]/bs]++;
191   count = 0;
192   for (i=0; i<a->mbs; i++) {
193     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_PLIB,"Internal error in PETSc");
194     if (vary[i]==bs) iary[count++] = i;
195   }
196   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);CHKERRQ(ierr);
197   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
198   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
199   ierr = PetscFree(vary);CHKERRQ(ierr);
200 
201   ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,cs,scall,B);CHKERRQ(ierr);
202   ISDestroy(is1);
203   ISDestroy(is2);
204   PetscFunctionReturn(0);
205 }
206 
207 #undef __FUNCT__
208 #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ"
209 PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
210 {
211   PetscErrorCode ierr;
212   PetscInt       i;
213 
214   PetscFunctionBegin;
215   if (scall == MAT_INITIAL_MATRIX) {
216     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
217   }
218 
219   for (i=0; i<n; i++) {
220     ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr);
221   }
222   PetscFunctionReturn(0);
223 }
224 
225 
226 /* -------------------------------------------------------*/
227 /* Should check that shapes of vectors and matrices match */
228 /* -------------------------------------------------------*/
229 #include "petscblaslapack.h"
230 
231 #undef __FUNCT__
232 #define __FUNCT__ "MatMult_SeqBAIJ_1"
233 PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
234 {
235   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
236   PetscScalar       *z,sum;
237   const PetscScalar *x;
238   const MatScalar   *v;
239   PetscErrorCode    ierr;
240   PetscInt          mbs,i,*idx,*ii,n,*ridx=PETSC_NULL,nonzerorow=0;
241   PetscTruth        usecprow=a->compressedrow.use;
242 
243   PetscFunctionBegin;
244   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
245   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
246 
247   idx = a->j;
248   v   = a->a;
249   if (usecprow){
250     mbs  = a->compressedrow.nrows;
251     ii   = a->compressedrow.i;
252     ridx = a->compressedrow.rindex;
253   } else {
254     mbs = a->mbs;
255     ii  = a->i;
256   }
257 
258   for (i=0; i<mbs; i++) {
259     n    = ii[1] - ii[0]; ii++;
260     sum  = 0.0;
261     nonzerorow += (n>0);
262     while (n--) sum += *v++ * x[*idx++];
263     if (usecprow){
264       z[ridx[i]] = sum;
265     } else {
266       z[i] = sum;
267     }
268   }
269   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
270   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
271   ierr = PetscLogFlops(2*a->nz - nonzerorow);CHKERRQ(ierr);
272   PetscFunctionReturn(0);
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatMult_SeqBAIJ_2"
277 PetscErrorCode MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
278 {
279   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
280   PetscScalar       *z = 0,sum1,sum2,*zarray;
281   const PetscScalar *x,*xb;
282   PetscScalar       x1,x2;
283   const MatScalar   *v;
284   PetscErrorCode    ierr;
285   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
286   PetscTruth        usecprow=a->compressedrow.use;
287 
288   PetscFunctionBegin;
289   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
290   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
291 
292   idx = a->j;
293   v   = a->a;
294   if (usecprow){
295     mbs  = a->compressedrow.nrows;
296     ii   = a->compressedrow.i;
297     ridx = a->compressedrow.rindex;
298   } else {
299     mbs = a->mbs;
300     ii  = a->i;
301     z   = zarray;
302   }
303 
304   for (i=0; i<mbs; i++) {
305     n  = ii[1] - ii[0]; ii++;
306     sum1 = 0.0; sum2 = 0.0;
307     nonzerorow += (n>0);
308     for (j=0; j<n; j++) {
309       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
310       sum1 += v[0]*x1 + v[2]*x2;
311       sum2 += v[1]*x1 + v[3]*x2;
312       v += 4;
313     }
314     if (usecprow) z = zarray + 2*ridx[i];
315     z[0] = sum1; z[1] = sum2;
316     if (!usecprow) z += 2;
317   }
318   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
319   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
320   ierr = PetscLogFlops(8*a->nz - 2*nonzerorow);CHKERRQ(ierr);
321   PetscFunctionReturn(0);
322 }
323 
324 #undef __FUNCT__
325 #define __FUNCT__ "MatMult_SeqBAIJ_3"
326 PetscErrorCode MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
327 {
328   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
329   PetscScalar       *z = 0,sum1,sum2,sum3,x1,x2,x3,*zarray;
330   const PetscScalar *x,*xb;
331   const MatScalar   *v;
332   PetscErrorCode    ierr;
333   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
334   PetscTruth        usecprow=a->compressedrow.use;
335 
336 
337 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
338 #pragma disjoint(*v,*z,*xb)
339 #endif
340 
341   PetscFunctionBegin;
342   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
343   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
344 
345   idx = a->j;
346   v   = a->a;
347   if (usecprow){
348     mbs  = a->compressedrow.nrows;
349     ii   = a->compressedrow.i;
350     ridx = a->compressedrow.rindex;
351   } else {
352     mbs = a->mbs;
353     ii  = a->i;
354     z   = zarray;
355   }
356 
357   for (i=0; i<mbs; i++) {
358     n  = ii[1] - ii[0]; ii++;
359     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
360     nonzerorow += (n>0);
361     for (j=0; j<n; j++) {
362       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
363       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
364       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
365       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
366       v += 9;
367     }
368     if (usecprow) z = zarray + 3*ridx[i];
369     z[0] = sum1; z[1] = sum2; z[2] = sum3;
370     if (!usecprow) z += 3;
371   }
372   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
373   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
374   ierr = PetscLogFlops(18*a->nz - 3*nonzerorow);CHKERRQ(ierr);
375   PetscFunctionReturn(0);
376 }
377 
378 #undef __FUNCT__
379 #define __FUNCT__ "MatMult_SeqBAIJ_4"
380 PetscErrorCode MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
381 {
382   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
383   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray;
384   const PetscScalar *x,*xb;
385   const MatScalar   *v;
386   PetscErrorCode    ierr;
387   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
388   PetscTruth        usecprow=a->compressedrow.use;
389 
390   PetscFunctionBegin;
391   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
392   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
393 
394   idx = a->j;
395   v   = a->a;
396   if (usecprow){
397     mbs  = a->compressedrow.nrows;
398     ii   = a->compressedrow.i;
399     ridx = a->compressedrow.rindex;
400   } else {
401     mbs = a->mbs;
402     ii  = a->i;
403     z   = zarray;
404   }
405 
406   for (i=0; i<mbs; i++) {
407     n  = ii[1] - ii[0]; ii++;
408     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
409     nonzerorow += (n>0);
410     for (j=0; j<n; j++) {
411       xb = x + 4*(*idx++);
412       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
413       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
414       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
415       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
416       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
417       v += 16;
418     }
419     if (usecprow) z = zarray + 4*ridx[i];
420     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
421     if (!usecprow) z += 4;
422   }
423   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
424   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
425   ierr = PetscLogFlops(32*a->nz - 4*nonzerorow);CHKERRQ(ierr);
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatMult_SeqBAIJ_5"
431 PetscErrorCode MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
432 {
433   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
434   PetscScalar       sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*z = 0,*zarray;
435   const PetscScalar *xb,*x;
436   const MatScalar   *v;
437   PetscErrorCode    ierr;
438   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
439   PetscTruth        usecprow=a->compressedrow.use;
440 
441   PetscFunctionBegin;
442   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
443   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
444 
445   idx = a->j;
446   v   = a->a;
447   if (usecprow){
448     mbs  = a->compressedrow.nrows;
449     ii   = a->compressedrow.i;
450     ridx = a->compressedrow.rindex;
451   } else {
452     mbs = a->mbs;
453     ii  = a->i;
454     z   = zarray;
455   }
456 
457   for (i=0; i<mbs; i++) {
458     n  = ii[1] - ii[0]; ii++;
459     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
460     nonzerorow += (n>0);
461     for (j=0; j<n; j++) {
462       xb = x + 5*(*idx++);
463       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
464       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
465       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
466       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
467       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
468       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
469       v += 25;
470     }
471     if (usecprow) z = zarray + 5*ridx[i];
472     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
473     if (!usecprow) z += 5;
474   }
475   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
476   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
477   ierr = PetscLogFlops(50*a->nz - 5*nonzerorow);CHKERRQ(ierr);
478   PetscFunctionReturn(0);
479 }
480 
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatMult_SeqBAIJ_6"
484 PetscErrorCode MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz)
485 {
486   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
487   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,sum5,sum6;
488   const PetscScalar *x,*xb;
489   PetscScalar       x1,x2,x3,x4,x5,x6,*zarray;
490   const MatScalar   *v;
491   PetscErrorCode    ierr;
492   PetscInt          mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
493   PetscTruth        usecprow=a->compressedrow.use;
494 
495   PetscFunctionBegin;
496   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
497   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
498 
499   idx = a->j;
500   v   = a->a;
501   if (usecprow){
502     mbs  = a->compressedrow.nrows;
503     ii   = a->compressedrow.i;
504     ridx = a->compressedrow.rindex;
505   } else {
506     mbs = a->mbs;
507     ii  = a->i;
508     z   = zarray;
509   }
510 
511   for (i=0; i<mbs; i++) {
512     n  = ii[1] - ii[0]; ii++;
513     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0;
514     nonzerorow += (n>0);
515     for (j=0; j<n; j++) {
516       xb = x + 6*(*idx++);
517       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
518       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
519       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
520       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
521       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
522       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
523       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
524       v += 36;
525     }
526     if (usecprow) z = zarray + 6*ridx[i];
527     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
528     if (!usecprow) z += 6;
529   }
530 
531   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
532   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
533   ierr = PetscLogFlops(72*a->nz - 6*nonzerorow);CHKERRQ(ierr);
534   PetscFunctionReturn(0);
535 }
536 #undef __FUNCT__
537 #define __FUNCT__ "MatMult_SeqBAIJ_7"
538 PetscErrorCode MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
539 {
540   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
541   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
542   const PetscScalar *x,*xb;
543   PetscScalar       x1,x2,x3,x4,x5,x6,x7,*zarray;
544   const MatScalar   *v;
545   PetscErrorCode    ierr;
546   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
547   PetscTruth        usecprow=a->compressedrow.use;
548 
549   PetscFunctionBegin;
550   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
551   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
552 
553   idx = a->j;
554   v   = a->a;
555   if (usecprow){
556     mbs    = a->compressedrow.nrows;
557     ii     = a->compressedrow.i;
558     ridx = a->compressedrow.rindex;
559   } else {
560     mbs = a->mbs;
561     ii  = a->i;
562     z   = zarray;
563   }
564 
565   for (i=0; i<mbs; i++) {
566     n  = ii[1] - ii[0]; ii++;
567     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
568     nonzerorow += (n>0);
569     for (j=0; j<n; j++) {
570       xb = x + 7*(*idx++);
571       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
572       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
573       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
574       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
575       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
576       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
577       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
578       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
579       v += 49;
580     }
581     if (usecprow) z = zarray + 7*ridx[i];
582     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
583     if (!usecprow) z += 7;
584   }
585 
586   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
587   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
588   ierr = PetscLogFlops(98*a->nz - 7*nonzerorow);CHKERRQ(ierr);
589   PetscFunctionReturn(0);
590 }
591 
592 /*
593     This will not work with MatScalar == float because it calls the BLAS
594 */
595 #undef __FUNCT__
596 #define __FUNCT__ "MatMult_SeqBAIJ_N"
597 PetscErrorCode MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
598 {
599   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
600   PetscScalar    *x,*z = 0,*xb,*work,*workt,*zarray;
601   MatScalar      *v;
602   PetscErrorCode ierr;
603   PetscInt       mbs=a->mbs,i,*idx,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2;
604   PetscInt       ncols,k,*ridx=PETSC_NULL,nonzerorow=0;
605   PetscTruth     usecprow=a->compressedrow.use;
606 
607   PetscFunctionBegin;
608   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
609   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
610 
611   idx = a->j;
612   v   = a->a;
613   if (usecprow){
614     mbs  = a->compressedrow.nrows;
615     ii   = a->compressedrow.i;
616     ridx = a->compressedrow.rindex;
617   } else {
618     mbs = a->mbs;
619     ii  = a->i;
620     z   = zarray;
621   }
622 
623   if (!a->mult_work) {
624     k    = PetscMax(A->rmap.n,A->cmap.n);
625     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
626   }
627   work = a->mult_work;
628   for (i=0; i<mbs; i++) {
629     n     = ii[1] - ii[0]; ii++;
630     ncols = n*bs;
631     workt = work;
632     nonzerorow += (n>0);
633     for (j=0; j<n; j++) {
634       xb = x + bs*(*idx++);
635       for (k=0; k<bs; k++) workt[k] = xb[k];
636       workt += bs;
637     }
638     if (usecprow) z = zarray + bs*ridx[i];
639     Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z);
640     /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */
641     v += n*bs2;
642     if (!usecprow) z += bs;
643   }
644   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
645   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
646   ierr = PetscLogFlops(2*a->nz*bs2 - bs*nonzerorow);CHKERRQ(ierr);
647   PetscFunctionReturn(0);
648 }
649 
650 extern PetscErrorCode VecCopy_Seq(Vec,Vec);
651 #undef __FUNCT__
652 #define __FUNCT__ "MatMultAdd_SeqBAIJ_1"
653 PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
654 {
655   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
656   PetscScalar    *x,*y = 0,*z = 0,sum,*yarray,*zarray;
657   MatScalar      *v;
658   PetscErrorCode ierr;
659   PetscInt       mbs=a->mbs,i,*idx,*ii,n,*ridx=PETSC_NULL;
660   PetscTruth     usecprow=a->compressedrow.use;
661 
662   PetscFunctionBegin;
663   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
664   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
665   if (zz != yy) {
666     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
667   } else {
668     zarray = yarray;
669   }
670 
671   idx = a->j;
672   v   = a->a;
673   if (usecprow){
674     if (zz != yy){
675       ierr = PetscMemcpy(zarray,yarray,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
676     }
677     mbs  = a->compressedrow.nrows;
678     ii   = a->compressedrow.i;
679     ridx = a->compressedrow.rindex;
680   } else {
681     ii  = a->i;
682     y   = yarray;
683     z   = zarray;
684   }
685 
686   for (i=0; i<mbs; i++) {
687     n    = ii[1] - ii[0]; ii++;
688     if (usecprow){
689       z = zarray + ridx[i];
690       y = yarray + ridx[i];
691     }
692     sum = y[0];
693     while (n--) sum += *v++ * x[*idx++];
694     z[0] = sum;
695     if (!usecprow){
696       z++; y++;
697     }
698   }
699   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
700   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
701   if (zz != yy) {
702     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
703   }
704   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatMultAdd_SeqBAIJ_2"
710 PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
711 {
712   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
713   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2;
714   PetscScalar    x1,x2,*yarray,*zarray;
715   MatScalar      *v;
716   PetscErrorCode ierr;
717   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
718   PetscTruth     usecprow=a->compressedrow.use;
719 
720   PetscFunctionBegin;
721   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
722   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
723   if (zz != yy) {
724     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
725   } else {
726     zarray = yarray;
727   }
728 
729   idx = a->j;
730   v   = a->a;
731   if (usecprow){
732     if (zz != yy){
733       ierr = PetscMemcpy(zarray,yarray,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
734     }
735     mbs  = a->compressedrow.nrows;
736     ii   = a->compressedrow.i;
737     ridx = a->compressedrow.rindex;
738     if (zz != yy){
739       ierr = PetscMemcpy(zarray,yarray,a->mbs*sizeof(PetscScalar));CHKERRQ(ierr);
740     }
741   } else {
742     ii  = a->i;
743     y   = yarray;
744     z   = zarray;
745   }
746 
747   for (i=0; i<mbs; i++) {
748     n  = ii[1] - ii[0]; ii++;
749     if (usecprow){
750       z = zarray + 2*ridx[i];
751       y = yarray + 2*ridx[i];
752     }
753     sum1 = y[0]; sum2 = y[1];
754     for (j=0; j<n; j++) {
755       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
756       sum1 += v[0]*x1 + v[2]*x2;
757       sum2 += v[1]*x1 + v[3]*x2;
758       v += 4;
759     }
760     z[0] = sum1; z[1] = sum2;
761     if (!usecprow){
762       z += 2; y += 2;
763     }
764   }
765   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
766   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
767   if (zz != yy) {
768     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
769   }
770   ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr);
771   PetscFunctionReturn(0);
772 }
773 
774 #undef __FUNCT__
775 #define __FUNCT__ "MatMultAdd_SeqBAIJ_3"
776 PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
777 {
778   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
779   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,x1,x2,x3,*yarray,*zarray;
780   MatScalar      *v;
781   PetscErrorCode ierr;
782   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
783   PetscTruth     usecprow=a->compressedrow.use;
784 
785   PetscFunctionBegin;
786   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
787   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
788   if (zz != yy) {
789     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
790   } else {
791     zarray = yarray;
792   }
793 
794   idx = a->j;
795   v   = a->a;
796   if (usecprow){
797     if (zz != yy){
798       ierr = PetscMemcpy(zarray,yarray,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
799     }
800     mbs  = a->compressedrow.nrows;
801     ii   = a->compressedrow.i;
802     ridx = a->compressedrow.rindex;
803   } else {
804     ii  = a->i;
805     y   = yarray;
806     z   = zarray;
807   }
808 
809   for (i=0; i<mbs; i++) {
810     n  = ii[1] - ii[0]; ii++;
811     if (usecprow){
812       z = zarray + 3*ridx[i];
813       y = yarray + 3*ridx[i];
814     }
815     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
816     for (j=0; j<n; j++) {
817       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
818       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
819       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
820       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
821       v += 9;
822     }
823     z[0] = sum1; z[1] = sum2; z[2] = sum3;
824     if (!usecprow){
825       z += 3; y += 3;
826     }
827   }
828   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
829   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
830   if (zz != yy) {
831     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
832   }
833   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
834   PetscFunctionReturn(0);
835 }
836 
837 #undef __FUNCT__
838 #define __FUNCT__ "MatMultAdd_SeqBAIJ_4"
839 PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
840 {
841   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
842   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*yarray,*zarray;
843   MatScalar      *v;
844   PetscErrorCode ierr;
845   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
846   PetscTruth     usecprow=a->compressedrow.use;
847 
848   PetscFunctionBegin;
849   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
850   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
851   if (zz != yy) {
852     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
853   } else {
854     zarray = yarray;
855   }
856 
857   idx   = a->j;
858   v     = a->a;
859   if (usecprow){
860     if (zz != yy){
861       ierr = PetscMemcpy(zarray,yarray,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
862     }
863     mbs  = a->compressedrow.nrows;
864     ii   = a->compressedrow.i;
865     ridx = a->compressedrow.rindex;
866   } else {
867     ii  = a->i;
868     y   = yarray;
869     z   = zarray;
870   }
871 
872   for (i=0; i<mbs; i++) {
873     n  = ii[1] - ii[0]; ii++;
874     if (usecprow){
875       z = zarray + 4*ridx[i];
876       y = yarray + 4*ridx[i];
877     }
878     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
879     for (j=0; j<n; j++) {
880       xb = x + 4*(*idx++);
881       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
882       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
883       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
884       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
885       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
886       v += 16;
887     }
888     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
889     if (!usecprow){
890       z += 4; y += 4;
891     }
892   }
893   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
894   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
895   if (zz != yy) {
896     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
897   }
898   ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr);
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "MatMultAdd_SeqBAIJ_5"
904 PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
905 {
906   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
907   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
908   PetscScalar    *yarray,*zarray;
909   MatScalar      *v;
910   PetscErrorCode ierr;
911   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
912   PetscTruth     usecprow=a->compressedrow.use;
913 
914   PetscFunctionBegin;
915   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
916   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
917   if (zz != yy) {
918     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
919   } else {
920     zarray = yarray;
921   }
922 
923   idx = a->j;
924   v   = a->a;
925   if (usecprow){
926     if (zz != yy){
927       ierr = PetscMemcpy(zarray,yarray,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
928     }
929     mbs  = a->compressedrow.nrows;
930     ii   = a->compressedrow.i;
931     ridx = a->compressedrow.rindex;
932   } else {
933     ii  = a->i;
934     y   = yarray;
935     z   = zarray;
936   }
937 
938   for (i=0; i<mbs; i++) {
939     n  = ii[1] - ii[0]; ii++;
940     if (usecprow){
941       z = zarray + 5*ridx[i];
942       y = yarray + 5*ridx[i];
943     }
944     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
945     for (j=0; j<n; j++) {
946       xb = x + 5*(*idx++);
947       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
948       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
949       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
950       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
951       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
952       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
953       v += 25;
954     }
955     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
956     if (!usecprow){
957       z += 5; y += 5;
958     }
959   }
960   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
961   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
962   if (zz != yy) {
963     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
964   }
965   ierr = PetscLogFlops(50*a->nz);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 #undef __FUNCT__
969 #define __FUNCT__ "MatMultAdd_SeqBAIJ_6"
970 PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
971 {
972   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
973   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
974   PetscScalar    x1,x2,x3,x4,x5,x6,*yarray,*zarray;
975   MatScalar      *v;
976   PetscErrorCode ierr;
977   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
978   PetscTruth     usecprow=a->compressedrow.use;
979 
980   PetscFunctionBegin;
981   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
982   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
983   if (zz != yy) {
984     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
985   } else {
986     zarray = yarray;
987   }
988 
989   idx = a->j;
990   v   = a->a;
991   if (usecprow){
992     if (zz != yy){
993       ierr = PetscMemcpy(zarray,yarray,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
994     }
995     mbs  = a->compressedrow.nrows;
996     ii   = a->compressedrow.i;
997     ridx = a->compressedrow.rindex;
998   } else {
999     ii  = a->i;
1000     y   = yarray;
1001     z   = zarray;
1002   }
1003 
1004   for (i=0; i<mbs; i++) {
1005     n  = ii[1] - ii[0]; ii++;
1006     if (usecprow){
1007       z = zarray + 6*ridx[i];
1008       y = yarray + 6*ridx[i];
1009     }
1010     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5];
1011     for (j=0; j<n; j++) {
1012       xb = x + 6*(*idx++);
1013       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
1014       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
1015       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
1016       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
1017       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
1018       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
1019       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
1020       v += 36;
1021     }
1022     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
1023     if (!usecprow){
1024       z += 6; y += 6;
1025     }
1026   }
1027   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1028   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
1029   if (zz != yy) {
1030     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1031   }
1032   ierr = PetscLogFlops(72*a->nz);CHKERRQ(ierr);
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 #undef __FUNCT__
1037 #define __FUNCT__ "MatMultAdd_SeqBAIJ_7"
1038 PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1039 {
1040   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1041   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1042   PetscScalar    x1,x2,x3,x4,x5,x6,x7,*yarray,*zarray;
1043   MatScalar      *v;
1044   PetscErrorCode ierr;
1045   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
1046   PetscTruth     usecprow=a->compressedrow.use;
1047 
1048   PetscFunctionBegin;
1049   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1050   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
1051   if (zz != yy) {
1052     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
1053   } else {
1054     zarray = yarray;
1055   }
1056 
1057   idx = a->j;
1058   v   = a->a;
1059   if (usecprow){
1060     if (zz != yy){
1061       ierr = PetscMemcpy(zarray,yarray,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1062     }
1063     mbs  = a->compressedrow.nrows;
1064     ii   = a->compressedrow.i;
1065     ridx = a->compressedrow.rindex;
1066   } else {
1067     ii  = a->i;
1068     y   = yarray;
1069     z   = zarray;
1070   }
1071 
1072   for (i=0; i<mbs; i++) {
1073     n  = ii[1] - ii[0]; ii++;
1074     if (usecprow){
1075       z = zarray + 7*ridx[i];
1076       y = yarray + 7*ridx[i];
1077     }
1078     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
1079     for (j=0; j<n; j++) {
1080       xb = x + 7*(*idx++);
1081       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1082       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1083       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1084       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1085       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1086       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1087       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1088       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1089       v += 49;
1090     }
1091     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1092     if (!usecprow){
1093       z += 7; y += 7;
1094     }
1095   }
1096   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1097   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
1098   if (zz != yy) {
1099     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1100   }
1101   ierr = PetscLogFlops(98*a->nz);CHKERRQ(ierr);
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatMultAdd_SeqBAIJ_N"
1107 PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1108 {
1109   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1110   PetscScalar    *x,*z = 0,*xb,*work,*workt,*zarray;
1111   MatScalar      *v;
1112   PetscErrorCode ierr;
1113   PetscInt       mbs,i,*idx,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2;
1114   PetscInt       ncols,k,*ridx=PETSC_NULL;
1115   PetscTruth     usecprow=a->compressedrow.use;
1116 
1117   PetscFunctionBegin;
1118   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
1119   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1120   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
1121 
1122   idx = a->j;
1123   v   = a->a;
1124   if (usecprow){
1125     mbs    = a->compressedrow.nrows;
1126     ii     = a->compressedrow.i;
1127     ridx = a->compressedrow.rindex;
1128   } else {
1129     mbs = a->mbs;
1130     ii  = a->i;
1131     z   = zarray;
1132   }
1133 
1134   if (!a->mult_work) {
1135     k    = PetscMax(A->rmap.n,A->cmap.n);
1136     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1137   }
1138   work = a->mult_work;
1139   for (i=0; i<mbs; i++) {
1140     n     = ii[1] - ii[0]; ii++;
1141     ncols = n*bs;
1142     workt = work;
1143     for (j=0; j<n; j++) {
1144       xb = x + bs*(*idx++);
1145       for (k=0; k<bs; k++) workt[k] = xb[k];
1146       workt += bs;
1147     }
1148     if (usecprow) z = zarray + bs*ridx[i];
1149     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
1150     /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */
1151     v += n*bs2;
1152     if (!usecprow){
1153       z += bs;
1154     }
1155   }
1156   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1157   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1158   ierr = PetscLogFlops(2*a->nz*bs2);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "MatMultTranspose_SeqBAIJ"
1164 PetscErrorCode MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz)
1165 {
1166   PetscScalar    zero = 0.0;
1167   PetscErrorCode ierr;
1168 
1169   PetscFunctionBegin;
1170   ierr = VecSet(zz,zero);CHKERRQ(ierr);
1171   ierr = MatMultTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr);
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 #undef __FUNCT__
1176 #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ"
1177 PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1178 
1179 {
1180   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1181   PetscScalar       *zb,*x,*z,*xb = 0,x1,x2,x3,x4,x5;
1182   MatScalar         *v;
1183   PetscErrorCode    ierr;
1184   PetscInt          mbs,i,*idx,*ii,rval,bs=A->rmap.bs,j,n,bs2=a->bs2,*ib,*ridx=PETSC_NULL;
1185   Mat_CompressedRow cprow = a->compressedrow;
1186   PetscTruth        usecprow=cprow.use;
1187 
1188   PetscFunctionBegin;
1189   if (yy != zz) { ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); }
1190   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1191   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
1192 
1193   idx = a->j;
1194   v   = a->a;
1195   if (usecprow){
1196     mbs  = cprow.nrows;
1197     ii   = cprow.i;
1198     ridx = cprow.rindex;
1199   } else {
1200     mbs=a->mbs;
1201     ii = a->i;
1202     xb = x;
1203   }
1204 
1205   switch (bs) {
1206   case 1:
1207     for (i=0; i<mbs; i++) {
1208       if (usecprow) xb = x + ridx[i];
1209       x1 = xb[0];
1210       ib = idx + ii[0];
1211       n  = ii[1] - ii[0]; ii++;
1212       for (j=0; j<n; j++) {
1213         rval    = ib[j];
1214         z[rval] += *v * x1;
1215         v++;
1216       }
1217       if (!usecprow) xb++;
1218     }
1219     break;
1220   case 2:
1221     for (i=0; i<mbs; i++) {
1222       if (usecprow) xb = x + 2*ridx[i];
1223       x1 = xb[0]; x2 = xb[1];
1224       ib = idx + ii[0];
1225       n  = ii[1] - ii[0]; ii++;
1226       for (j=0; j<n; j++) {
1227         rval      = ib[j]*2;
1228         z[rval++] += v[0]*x1 + v[1]*x2;
1229         z[rval++] += v[2]*x1 + v[3]*x2;
1230         v  += 4;
1231       }
1232       if (!usecprow) xb += 2;
1233     }
1234     break;
1235   case 3:
1236     for (i=0; i<mbs; i++) {
1237       if (usecprow) xb = x + 3*ridx[i];
1238       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1239       ib = idx + ii[0];
1240       n  = ii[1] - ii[0]; ii++;
1241       for (j=0; j<n; j++) {
1242         rval      = ib[j]*3;
1243         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1244         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1245         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1246         v  += 9;
1247       }
1248       if (!usecprow) xb += 3;
1249     }
1250     break;
1251   case 4:
1252     for (i=0; i<mbs; i++) {
1253       if (usecprow) xb = x + 4*ridx[i];
1254       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1255       ib = idx + ii[0];
1256       n  = ii[1] - ii[0]; ii++;
1257       for (j=0; j<n; j++) {
1258         rval      = ib[j]*4;
1259         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1260         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1261         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1262         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1263         v  += 16;
1264       }
1265       if (!usecprow) xb += 4;
1266     }
1267     break;
1268   case 5:
1269     for (i=0; i<mbs; i++) {
1270       if (usecprow) xb = x + 5*ridx[i];
1271       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1272       x4 = xb[3]; x5 = xb[4];
1273       ib = idx + ii[0];
1274       n  = ii[1] - ii[0]; ii++;
1275       for (j=0; j<n; j++) {
1276         rval      = ib[j]*5;
1277         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1278         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1279         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1280         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1281         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1282         v  += 25;
1283       }
1284       if (!usecprow) xb += 5;
1285     }
1286     break;
1287   default: {      /* block sizes larger then 5 by 5 are handled by BLAS */
1288       PetscInt     ncols,k;
1289       PetscScalar  *work,*workt,*xtmp;
1290 
1291       if (!a->mult_work) {
1292         k = PetscMax(A->rmap.n,A->cmap.n);
1293         ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1294       }
1295       work = a->mult_work;
1296       xtmp = x;
1297       for (i=0; i<mbs; i++) {
1298         n     = ii[1] - ii[0]; ii++;
1299         ncols = n*bs;
1300         ierr  = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1301         if (usecprow) {
1302           xtmp = x + bs*ridx[i];
1303         }
1304         Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work);
1305         /* BLASgemv_("T",&bs,&ncols,&_DOne,v,&bs,xtmp,&_One,&_DOne,work,&_One); */
1306         v += n*bs2;
1307         if (!usecprow) xtmp += bs;
1308         workt = work;
1309         for (j=0; j<n; j++) {
1310           zb = z + bs*(*idx++);
1311           for (k=0; k<bs; k++) zb[k] += workt[k] ;
1312           workt += bs;
1313         }
1314       }
1315     }
1316   }
1317   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1318   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
1319   ierr = PetscLogFlops(2*a->nz*a->bs2);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatScale_SeqBAIJ"
1325 PetscErrorCode MatScale_SeqBAIJ(Mat inA,PetscScalar alpha)
1326 {
1327   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1328   PetscInt       totalnz = a->bs2*a->nz;
1329   PetscScalar    oalpha = alpha;
1330   PetscErrorCode ierr;
1331   PetscBLASInt   one = 1,tnz = PetscBLASIntCast(totalnz);
1332 
1333   PetscFunctionBegin;
1334   BLASscal_(&tnz,&oalpha,a->a,&one);
1335   ierr = PetscLogFlops(totalnz);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatNorm_SeqBAIJ"
1341 PetscErrorCode MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm)
1342 {
1343   PetscErrorCode ierr;
1344   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1345   MatScalar      *v = a->a;
1346   PetscReal      sum = 0.0;
1347   PetscInt       i,j,k,bs=A->rmap.bs,nz=a->nz,bs2=a->bs2,k1;
1348 
1349   PetscFunctionBegin;
1350   if (type == NORM_FROBENIUS) {
1351     for (i=0; i< bs2*nz; i++) {
1352 #if defined(PETSC_USE_COMPLEX)
1353       sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1354 #else
1355       sum += (*v)*(*v); v++;
1356 #endif
1357     }
1358     *norm = sqrt(sum);
1359   } else if (type == NORM_1) { /* maximum column sum */
1360     PetscReal *tmp;
1361     PetscInt  *bcol = a->j;
1362     ierr = PetscMalloc((A->cmap.n+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1363     ierr = PetscMemzero(tmp,A->cmap.n*sizeof(PetscReal));CHKERRQ(ierr);
1364     for (i=0; i<nz; i++){
1365       for (j=0; j<bs; j++){
1366         k1 = bs*(*bcol) + j; /* column index */
1367         for (k=0; k<bs; k++){
1368           tmp[k1] += PetscAbsScalar(*v); v++;
1369         }
1370       }
1371       bcol++;
1372     }
1373     *norm = 0.0;
1374     for (j=0; j<A->cmap.n; j++) {
1375       if (tmp[j] > *norm) *norm = tmp[j];
1376     }
1377     ierr = PetscFree(tmp);CHKERRQ(ierr);
1378   } else if (type == NORM_INFINITY) { /* maximum row sum */
1379     *norm = 0.0;
1380     for (k=0; k<bs; k++) {
1381       for (j=0; j<a->mbs; j++) {
1382         v = a->a + bs2*a->i[j] + k;
1383         sum = 0.0;
1384         for (i=0; i<a->i[j+1]-a->i[j]; i++) {
1385           for (k1=0; k1<bs; k1++){
1386             sum += PetscAbsScalar(*v);
1387             v   += bs;
1388           }
1389         }
1390         if (sum > *norm) *norm = sum;
1391       }
1392     }
1393   } else {
1394     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1395   }
1396   PetscFunctionReturn(0);
1397 }
1398 
1399 
1400 #undef __FUNCT__
1401 #define __FUNCT__ "MatEqual_SeqBAIJ"
1402 PetscErrorCode MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg)
1403 {
1404   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data;
1405   PetscErrorCode ierr;
1406 
1407   PetscFunctionBegin;
1408   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1409   if ((A->rmap.N != B->rmap.N) || (A->cmap.n != B->cmap.n) || (A->rmap.bs != B->rmap.bs)|| (a->nz != b->nz)) {
1410     *flg = PETSC_FALSE;
1411     PetscFunctionReturn(0);
1412   }
1413 
1414   /* if the a->i are the same */
1415   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1416   if (!*flg) {
1417     PetscFunctionReturn(0);
1418   }
1419 
1420   /* if a->j are the same */
1421   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1422   if (!*flg) {
1423     PetscFunctionReturn(0);
1424   }
1425   /* if a->a are the same */
1426   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap.bs)*(B->rmap.bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1427   PetscFunctionReturn(0);
1428 
1429 }
1430 
1431 #undef __FUNCT__
1432 #define __FUNCT__ "MatGetDiagonal_SeqBAIJ"
1433 PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1434 {
1435   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1436   PetscErrorCode ierr;
1437   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1438   PetscScalar    *x,zero = 0.0;
1439   MatScalar      *aa,*aa_j;
1440 
1441   PetscFunctionBegin;
1442   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1443   bs   = A->rmap.bs;
1444   aa   = a->a;
1445   ai   = a->i;
1446   aj   = a->j;
1447   ambs = a->mbs;
1448   bs2  = a->bs2;
1449 
1450   ierr = VecSet(v,zero);CHKERRQ(ierr);
1451   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1452   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1453   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1454   for (i=0; i<ambs; i++) {
1455     for (j=ai[i]; j<ai[i+1]; j++) {
1456       if (aj[j] == i) {
1457         row  = i*bs;
1458         aa_j = aa+j*bs2;
1459         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1460         break;
1461       }
1462     }
1463   }
1464   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 #undef __FUNCT__
1469 #define __FUNCT__ "MatDiagonalScale_SeqBAIJ"
1470 PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1471 {
1472   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1473   PetscScalar    *l,*r,x,*li,*ri;
1474   MatScalar      *aa,*v;
1475   PetscErrorCode ierr;
1476   PetscInt       i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1477 
1478   PetscFunctionBegin;
1479   ai  = a->i;
1480   aj  = a->j;
1481   aa  = a->a;
1482   m   = A->rmap.n;
1483   n   = A->cmap.n;
1484   bs  = A->rmap.bs;
1485   mbs = a->mbs;
1486   bs2 = a->bs2;
1487   if (ll) {
1488     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
1489     ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1490     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1491     for (i=0; i<mbs; i++) { /* for each block row */
1492       M  = ai[i+1] - ai[i];
1493       li = l + i*bs;
1494       v  = aa + bs2*ai[i];
1495       for (j=0; j<M; j++) { /* for each block */
1496         for (k=0; k<bs2; k++) {
1497           (*v++) *= li[k%bs];
1498         }
1499       }
1500     }
1501     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1502     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1503   }
1504 
1505   if (rr) {
1506     ierr = VecGetArray(rr,&r);CHKERRQ(ierr);
1507     ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr);
1508     if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
1509     for (i=0; i<mbs; i++) { /* for each block row */
1510       M  = ai[i+1] - ai[i];
1511       v  = aa + bs2*ai[i];
1512       for (j=0; j<M; j++) { /* for each block */
1513         ri = r + bs*aj[ai[i]+j];
1514         for (k=0; k<bs; k++) {
1515           x = ri[k];
1516           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
1517         }
1518       }
1519     }
1520     ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr);
1521     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1522   }
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 
1527 #undef __FUNCT__
1528 #define __FUNCT__ "MatGetInfo_SeqBAIJ"
1529 PetscErrorCode MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1530 {
1531   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1532 
1533   PetscFunctionBegin;
1534   info->rows_global    = (double)A->rmap.N;
1535   info->columns_global = (double)A->cmap.n;
1536   info->rows_local     = (double)A->rmap.n;
1537   info->columns_local  = (double)A->cmap.n;
1538   info->block_size     = a->bs2;
1539   info->nz_allocated   = a->maxnz;
1540   info->nz_used        = a->bs2*a->nz;
1541   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1542   info->assemblies   = A->num_ass;
1543   info->mallocs      = a->reallocs;
1544   info->memory       = ((PetscObject)A)->mem;
1545   if (A->factor) {
1546     info->fill_ratio_given  = A->info.fill_ratio_given;
1547     info->fill_ratio_needed = A->info.fill_ratio_needed;
1548     info->factor_mallocs    = A->info.factor_mallocs;
1549   } else {
1550     info->fill_ratio_given  = 0;
1551     info->fill_ratio_needed = 0;
1552     info->factor_mallocs    = 0;
1553   }
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MatZeroEntries_SeqBAIJ"
1560 PetscErrorCode MatZeroEntries_SeqBAIJ(Mat A)
1561 {
1562   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1563   PetscErrorCode ierr;
1564 
1565   PetscFunctionBegin;
1566   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
1567   PetscFunctionReturn(0);
1568 }
1569