xref: /petsc/src/mat/impls/baij/seq/baij2.c (revision c8cd8b03a8e69285ecfea39db41c0ff684948fa5)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/seq/baij.h"
4 #include "src/inline/spops.h"
5 #include "src/inline/ilu.h"
6 #include "petscbt.h"
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ"
10 PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
11 {
12   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
13   PetscErrorCode ierr;
14   PetscInt       row,i,j,k,l,m,n,*idx,*nidx,isz,val,ival;
15   PetscInt       start,end,*ai,*aj,bs,*nidx2;
16   PetscBT        table;
17 
18   PetscFunctionBegin;
19   m     = a->mbs;
20   ai    = a->i;
21   aj    = a->j;
22   bs    = A->rmap.bs;
23 
24   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
25 
26   ierr = PetscBTCreate(m,table);CHKERRQ(ierr);
27   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr);
28   ierr = PetscMalloc((A->rmap.N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr);
29 
30   for (i=0; i<is_max; i++) {
31     /* Initialise the two local arrays */
32     isz  = 0;
33     ierr = PetscBTMemzero(m,table);CHKERRQ(ierr);
34 
35     /* Extract the indices, assume there can be duplicate entries */
36     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
37     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
38 
39     /* Enter these into the temp arrays i.e mark table[row], enter row into new index */
40     for (j=0; j<n ; ++j){
41       ival = idx[j]/bs; /* convert the indices into block indices */
42       if (ival>=m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
43       if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;}
44     }
45     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
46     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
47 
48     k = 0;
49     for (j=0; j<ov; j++){ /* for each overlap*/
50       n = isz;
51       for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */
52         row   = nidx[k];
53         start = ai[row];
54         end   = ai[row+1];
55         for (l = start; l<end ; l++){
56           val = aj[l];
57           if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;}
58         }
59       }
60     }
61     /* expand the Index Set */
62     for (j=0; j<isz; j++) {
63       for (k=0; k<bs; k++)
64         nidx2[j*bs+k] = nidx[j]*bs+k;
65     }
66     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
67   }
68   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
69   ierr = PetscFree(nidx);CHKERRQ(ierr);
70   ierr = PetscFree(nidx2);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private"
76 PetscErrorCode MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
77 {
78   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*c;
79   PetscErrorCode ierr;
80   PetscInt       *smap,i,k,kstart,kend,oldcols = a->nbs,*lens;
81   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
82   PetscInt       *irow,*icol,nrows,ncols,*ssmap,bs=A->rmap.bs,bs2=a->bs2;
83   PetscInt       *aj = a->j,*ai = a->i;
84   MatScalar      *mat_a;
85   Mat            C;
86   PetscTruth     flag;
87 
88   PetscFunctionBegin;
89   ierr = ISSorted(iscol,(PetscTruth*)&i);CHKERRQ(ierr);
90   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
91 
92   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
93   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
94   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
95   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
96 
97   ierr = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr);
98   ssmap = smap;
99   ierr = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
100   ierr  = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr);
101   for (i=0; i<ncols; i++) smap[icol[i]] = i+1;
102   /* determine lens of each row */
103   for (i=0; i<nrows; i++) {
104     kstart  = ai[irow[i]];
105     kend    = kstart + a->ilen[irow[i]];
106     lens[i] = 0;
107       for (k=kstart; k<kend; k++) {
108         if (ssmap[aj[k]]) {
109           lens[i]++;
110         }
111       }
112     }
113   /* Create and fill new matrix */
114   if (scall == MAT_REUSE_MATRIX) {
115     c = (Mat_SeqBAIJ *)((*B)->data);
116 
117     if (c->mbs!=nrows || c->nbs!=ncols || (*B)->rmap.bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
118     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr);
119     if (!flag) {
120       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
121     }
122     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr);
123     C = *B;
124   } else {
125     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
126     ierr = MatSetSizes(C,nrows*bs,ncols*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
127     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
128     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,lens);CHKERRQ(ierr);
129   }
130   c = (Mat_SeqBAIJ *)(C->data);
131   for (i=0; i<nrows; i++) {
132     row    = irow[i];
133     kstart = ai[row];
134     kend   = kstart + a->ilen[row];
135     mat_i  = c->i[i];
136     mat_j  = c->j + mat_i;
137     mat_a  = c->a + mat_i*bs2;
138     mat_ilen = c->ilen + i;
139     for (k=kstart; k<kend; k++) {
140       if ((tcol=ssmap[a->j[k]])) {
141         *mat_j++ = tcol - 1;
142         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
143         mat_a   += bs2;
144         (*mat_ilen)++;
145       }
146     }
147   }
148 
149   /* Free work space */
150   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
151   ierr = PetscFree(smap);CHKERRQ(ierr);
152   ierr = PetscFree(lens);CHKERRQ(ierr);
153   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
154   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
155 
156   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
157   *B = C;
158   PetscFunctionReturn(0);
159 }
160 
161 #undef __FUNCT__
162 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ"
163 PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
164 {
165   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
166   IS             is1,is2;
167   PetscErrorCode ierr;
168   PetscInt       *vary,*iary,*irow,*icol,nrows,ncols,i,bs=A->rmap.bs,count;
169 
170   PetscFunctionBegin;
171   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
172   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
173   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
174   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
175 
176   /* Verify if the indices corespond to each element in a block
177    and form the IS with compressed IS */
178   ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr);
179   iary = vary + a->mbs;
180   ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr);
181   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
182   count = 0;
183   for (i=0; i<a->mbs; i++) {
184     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Index set does not match blocks");
185     if (vary[i]==bs) iary[count++] = i;
186   }
187   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
188 
189   ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr);
190   for (i=0; i<ncols; i++) vary[icol[i]/bs]++;
191   count = 0;
192   for (i=0; i<a->mbs; i++) {
193     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_PLIB,"Internal error in PETSc");
194     if (vary[i]==bs) iary[count++] = i;
195   }
196   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);CHKERRQ(ierr);
197   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
198   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
199   ierr = PetscFree(vary);CHKERRQ(ierr);
200 
201   ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,cs,scall,B);CHKERRQ(ierr);
202   ISDestroy(is1);
203   ISDestroy(is2);
204   PetscFunctionReturn(0);
205 }
206 
207 #undef __FUNCT__
208 #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ"
209 PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
210 {
211   PetscErrorCode ierr;
212   PetscInt       i;
213 
214   PetscFunctionBegin;
215   if (scall == MAT_INITIAL_MATRIX) {
216     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
217   }
218 
219   for (i=0; i<n; i++) {
220     ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);CHKERRQ(ierr);
221   }
222   PetscFunctionReturn(0);
223 }
224 
225 
226 /* -------------------------------------------------------*/
227 /* Should check that shapes of vectors and matrices match */
228 /* -------------------------------------------------------*/
229 #include "petscblaslapack.h"
230 
231 #undef __FUNCT__
232 #define __FUNCT__ "MatMult_SeqBAIJ_1"
233 PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
234 {
235   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
236   PetscScalar    *x,*z,sum;
237   MatScalar      *v;
238   PetscErrorCode ierr;
239   PetscInt       mbs,i,*idx,*ii,n,*ridx=PETSC_NULL,nonzerorow=0;
240   PetscTruth     usecprow=a->compressedrow.use;
241 
242   PetscFunctionBegin;
243   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
244   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
245 
246   idx = a->j;
247   v   = a->a;
248   if (usecprow){
249     mbs  = a->compressedrow.nrows;
250     ii   = a->compressedrow.i;
251     ridx = a->compressedrow.rindex;
252   } else {
253     mbs = a->mbs;
254     ii  = a->i;
255   }
256 
257   for (i=0; i<mbs; i++) {
258     n    = ii[1] - ii[0]; ii++;
259     sum  = 0.0;
260     nonzerorow += (n>0);
261     while (n--) sum += *v++ * x[*idx++];
262     if (usecprow){
263       z[ridx[i]] = sum;
264     } else {
265       z[i] = sum;
266     }
267   }
268   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
269   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
270   ierr = PetscLogFlops(2*a->nz - nonzerorow);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatMult_SeqBAIJ_2"
276 PetscErrorCode MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
277 {
278   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
279   PetscScalar    *x,*z = 0,*xb,sum1,sum2,*zarray;
280   PetscScalar    x1,x2;
281   MatScalar      *v;
282   PetscErrorCode ierr;
283   PetscInt       mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
284   PetscTruth     usecprow=a->compressedrow.use;
285 
286   PetscFunctionBegin;
287   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
288   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
289 
290   idx = a->j;
291   v   = a->a;
292   if (usecprow){
293     mbs  = a->compressedrow.nrows;
294     ii   = a->compressedrow.i;
295     ridx = a->compressedrow.rindex;
296   } else {
297     mbs = a->mbs;
298     ii  = a->i;
299     z   = zarray;
300   }
301 
302   for (i=0; i<mbs; i++) {
303     n  = ii[1] - ii[0]; ii++;
304     sum1 = 0.0; sum2 = 0.0;
305     nonzerorow += (n>0);
306     for (j=0; j<n; j++) {
307       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
308       sum1 += v[0]*x1 + v[2]*x2;
309       sum2 += v[1]*x1 + v[3]*x2;
310       v += 4;
311     }
312     if (usecprow) z = zarray + 2*ridx[i];
313     z[0] = sum1; z[1] = sum2;
314     if (!usecprow) z += 2;
315   }
316   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
317   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
318   ierr = PetscLogFlops(8*a->nz - 2*nonzerorow);CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatMult_SeqBAIJ_3"
324 PetscErrorCode MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
325 {
326   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
327   PetscScalar    *x,*z = 0,*xb,sum1,sum2,sum3,x1,x2,x3,*zarray;
328   MatScalar      *v;
329   PetscErrorCode ierr;
330   PetscInt       mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
331   PetscTruth     usecprow=a->compressedrow.use;
332 
333 
334 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
335 #pragma disjoint(*v,*z,*xb)
336 #endif
337 
338   PetscFunctionBegin;
339   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
340   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
341 
342   idx = a->j;
343   v   = a->a;
344   if (usecprow){
345     mbs  = a->compressedrow.nrows;
346     ii   = a->compressedrow.i;
347     ridx = a->compressedrow.rindex;
348   } else {
349     mbs = a->mbs;
350     ii  = a->i;
351     z   = zarray;
352   }
353 
354   for (i=0; i<mbs; i++) {
355     n  = ii[1] - ii[0]; ii++;
356     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
357     nonzerorow += (n>0);
358     for (j=0; j<n; j++) {
359       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
360       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
361       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
362       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
363       v += 9;
364     }
365     if (usecprow) z = zarray + 3*ridx[i];
366     z[0] = sum1; z[1] = sum2; z[2] = sum3;
367     if (!usecprow) z += 3;
368   }
369   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
370   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
371   ierr = PetscLogFlops(18*a->nz - 3*nonzerorow);CHKERRQ(ierr);
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "MatMult_SeqBAIJ_4"
377 PetscErrorCode MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
378 {
379   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
380   PetscScalar    *x,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray;
381   MatScalar      *v;
382   PetscErrorCode ierr;
383   PetscInt       mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
384   PetscTruth     usecprow=a->compressedrow.use;
385 
386   PetscFunctionBegin;
387   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
388   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
389 
390   idx = a->j;
391   v   = a->a;
392   if (usecprow){
393     mbs  = a->compressedrow.nrows;
394     ii   = a->compressedrow.i;
395     ridx = a->compressedrow.rindex;
396   } else {
397     mbs = a->mbs;
398     ii  = a->i;
399     z   = zarray;
400   }
401 
402   for (i=0; i<mbs; i++) {
403     n  = ii[1] - ii[0]; ii++;
404     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
405     nonzerorow += (n>0);
406     for (j=0; j<n; j++) {
407       xb = x + 4*(*idx++);
408       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
409       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
410       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
411       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
412       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
413       v += 16;
414     }
415     if (usecprow) z = zarray + 4*ridx[i];
416     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
417     if (!usecprow) z += 4;
418   }
419   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
420   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
421   ierr = PetscLogFlops(32*a->nz - 4*nonzerorow);CHKERRQ(ierr);
422   PetscFunctionReturn(0);
423 }
424 
425 #undef __FUNCT__
426 #define __FUNCT__ "MatMult_SeqBAIJ_5"
427 PetscErrorCode MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
428 {
429   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
430   PetscScalar    sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*xb,*z = 0,*x,*zarray;
431   MatScalar      *v;
432   PetscErrorCode ierr;
433   PetscInt       mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
434   PetscTruth     usecprow=a->compressedrow.use;
435 
436   PetscFunctionBegin;
437   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
438   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
439 
440   idx = a->j;
441   v   = a->a;
442   if (usecprow){
443     mbs  = a->compressedrow.nrows;
444     ii   = a->compressedrow.i;
445     ridx = a->compressedrow.rindex;
446   } else {
447     mbs = a->mbs;
448     ii  = a->i;
449     z   = zarray;
450   }
451 
452   for (i=0; i<mbs; i++) {
453     n  = ii[1] - ii[0]; ii++;
454     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
455     nonzerorow += (n>0);
456     for (j=0; j<n; j++) {
457       xb = x + 5*(*idx++);
458       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
459       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
460       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
461       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
462       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
463       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
464       v += 25;
465     }
466     if (usecprow) z = zarray + 5*ridx[i];
467     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
468     if (!usecprow) z += 5;
469   }
470   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
471   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
472   ierr = PetscLogFlops(50*a->nz - 5*nonzerorow);CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 
477 #undef __FUNCT__
478 #define __FUNCT__ "MatMult_SeqBAIJ_6"
479 PetscErrorCode MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz)
480 {
481   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
482   PetscScalar    *x,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
483   PetscScalar    x1,x2,x3,x4,x5,x6,*zarray;
484   MatScalar      *v;
485   PetscErrorCode ierr;
486   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
487   PetscTruth     usecprow=a->compressedrow.use;
488 
489   PetscFunctionBegin;
490   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
491   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
492 
493   idx = a->j;
494   v   = a->a;
495   if (usecprow){
496     mbs  = a->compressedrow.nrows;
497     ii   = a->compressedrow.i;
498     ridx = a->compressedrow.rindex;
499   } else {
500     mbs = a->mbs;
501     ii  = a->i;
502     z   = zarray;
503   }
504 
505   for (i=0; i<mbs; i++) {
506     n  = ii[1] - ii[0]; ii++;
507     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0;
508     nonzerorow += (n>0);
509     for (j=0; j<n; j++) {
510       xb = x + 6*(*idx++);
511       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
512       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
513       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
514       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
515       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
516       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
517       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
518       v += 36;
519     }
520     if (usecprow) z = zarray + 6*ridx[i];
521     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
522     if (!usecprow) z += 6;
523   }
524 
525   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
526   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
527   ierr = PetscLogFlops(72*a->nz - 6*nonzerorow);CHKERRQ(ierr);
528   PetscFunctionReturn(0);
529 }
530 #undef __FUNCT__
531 #define __FUNCT__ "MatMult_SeqBAIJ_7"
532 PetscErrorCode MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
533 {
534   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
535   PetscScalar    *x,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
536   PetscScalar    x1,x2,x3,x4,x5,x6,x7,*zarray;
537   MatScalar      *v;
538   PetscErrorCode ierr;
539   PetscInt       mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
540   PetscTruth     usecprow=a->compressedrow.use;
541 
542   PetscFunctionBegin;
543   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
544   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
545 
546   idx = a->j;
547   v   = a->a;
548   if (usecprow){
549     mbs    = a->compressedrow.nrows;
550     ii     = a->compressedrow.i;
551     ridx = a->compressedrow.rindex;
552   } else {
553     mbs = a->mbs;
554     ii  = a->i;
555     z   = zarray;
556   }
557 
558   for (i=0; i<mbs; i++) {
559     n  = ii[1] - ii[0]; ii++;
560     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
561     nonzerorow += (n>0);
562     for (j=0; j<n; j++) {
563       xb = x + 7*(*idx++);
564       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
565       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
566       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
567       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
568       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
569       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
570       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
571       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
572       v += 49;
573     }
574     if (usecprow) z = zarray + 7*ridx[i];
575     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
576     if (!usecprow) z += 7;
577   }
578 
579   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
580   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
581   ierr = PetscLogFlops(98*a->nz - 7*nonzerorow);CHKERRQ(ierr);
582   PetscFunctionReturn(0);
583 }
584 
585 /*
586     This will not work with MatScalar == float because it calls the BLAS
587 */
588 #undef __FUNCT__
589 #define __FUNCT__ "MatMult_SeqBAIJ_N"
590 PetscErrorCode MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
591 {
592   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
593   PetscScalar    *x,*z = 0,*xb,*work,*workt,*zarray;
594   MatScalar      *v;
595   PetscErrorCode ierr;
596   PetscInt       mbs=a->mbs,i,*idx,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2;
597   PetscInt       ncols,k,*ridx=PETSC_NULL,nonzerorow=0;
598   PetscTruth     usecprow=a->compressedrow.use;
599 
600   PetscFunctionBegin;
601   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
602   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
603 
604   idx = a->j;
605   v   = a->a;
606   if (usecprow){
607     mbs  = a->compressedrow.nrows;
608     ii   = a->compressedrow.i;
609     ridx = a->compressedrow.rindex;
610   } else {
611     mbs = a->mbs;
612     ii  = a->i;
613     z   = zarray;
614   }
615 
616   if (!a->mult_work) {
617     k    = PetscMax(A->rmap.n,A->cmap.n);
618     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
619   }
620   work = a->mult_work;
621   for (i=0; i<mbs; i++) {
622     n     = ii[1] - ii[0]; ii++;
623     ncols = n*bs;
624     workt = work;
625     nonzerorow += (n>0);
626     for (j=0; j<n; j++) {
627       xb = x + bs*(*idx++);
628       for (k=0; k<bs; k++) workt[k] = xb[k];
629       workt += bs;
630     }
631     if (usecprow) z = zarray + bs*ridx[i];
632     Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z);
633     /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */
634     v += n*bs2;
635     if (!usecprow) z += bs;
636   }
637   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
638   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
639   ierr = PetscLogFlops(2*a->nz*bs2 - bs*nonzerorow);CHKERRQ(ierr);
640   PetscFunctionReturn(0);
641 }
642 
643 extern PetscErrorCode VecCopy_Seq(Vec,Vec);
644 #undef __FUNCT__
645 #define __FUNCT__ "MatMultAdd_SeqBAIJ_1"
646 PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
647 {
648   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
649   PetscScalar    *x,*y = 0,*z = 0,sum,*yarray,*zarray;
650   MatScalar      *v;
651   PetscErrorCode ierr;
652   PetscInt       mbs=a->mbs,i,*idx,*ii,n,*ridx=PETSC_NULL;
653   PetscTruth     usecprow=a->compressedrow.use;
654 
655   PetscFunctionBegin;
656   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
657   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
658   if (zz != yy) {
659     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
660   } else {
661     zarray = yarray;
662   }
663 
664   idx = a->j;
665   v   = a->a;
666   if (usecprow){
667     if (zz != yy){
668       ierr = PetscMemcpy(zarray,yarray,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
669     }
670     mbs  = a->compressedrow.nrows;
671     ii   = a->compressedrow.i;
672     ridx = a->compressedrow.rindex;
673   } else {
674     ii  = a->i;
675     y   = yarray;
676     z   = zarray;
677   }
678 
679   for (i=0; i<mbs; i++) {
680     n    = ii[1] - ii[0]; ii++;
681     if (usecprow){
682       z = zarray + ridx[i];
683       y = yarray + ridx[i];
684     }
685     sum = y[0];
686     while (n--) sum += *v++ * x[*idx++];
687     z[0] = sum;
688     if (!usecprow){
689       z++; y++;
690     }
691   }
692   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
693   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
694   if (zz != yy) {
695     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
696   }
697   ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr);
698   PetscFunctionReturn(0);
699 }
700 
701 #undef __FUNCT__
702 #define __FUNCT__ "MatMultAdd_SeqBAIJ_2"
703 PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
704 {
705   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
706   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2;
707   PetscScalar    x1,x2,*yarray,*zarray;
708   MatScalar      *v;
709   PetscErrorCode ierr;
710   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
711   PetscTruth     usecprow=a->compressedrow.use;
712 
713   PetscFunctionBegin;
714   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
715   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
716   if (zz != yy) {
717     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
718   } else {
719     zarray = yarray;
720   }
721 
722   idx = a->j;
723   v   = a->a;
724   if (usecprow){
725     if (zz != yy){
726       ierr = PetscMemcpy(zarray,yarray,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
727     }
728     mbs  = a->compressedrow.nrows;
729     ii   = a->compressedrow.i;
730     ridx = a->compressedrow.rindex;
731     if (zz != yy){
732       ierr = PetscMemcpy(zarray,yarray,a->mbs*sizeof(PetscScalar));CHKERRQ(ierr);
733     }
734   } else {
735     ii  = a->i;
736     y   = yarray;
737     z   = zarray;
738   }
739 
740   for (i=0; i<mbs; i++) {
741     n  = ii[1] - ii[0]; ii++;
742     if (usecprow){
743       z = zarray + 2*ridx[i];
744       y = yarray + 2*ridx[i];
745     }
746     sum1 = y[0]; sum2 = y[1];
747     for (j=0; j<n; j++) {
748       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
749       sum1 += v[0]*x1 + v[2]*x2;
750       sum2 += v[1]*x1 + v[3]*x2;
751       v += 4;
752     }
753     z[0] = sum1; z[1] = sum2;
754     if (!usecprow){
755       z += 2; y += 2;
756     }
757   }
758   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
759   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
760   if (zz != yy) {
761     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
762   }
763   ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 #undef __FUNCT__
768 #define __FUNCT__ "MatMultAdd_SeqBAIJ_3"
769 PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
770 {
771   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
772   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,x1,x2,x3,*yarray,*zarray;
773   MatScalar      *v;
774   PetscErrorCode ierr;
775   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
776   PetscTruth     usecprow=a->compressedrow.use;
777 
778   PetscFunctionBegin;
779   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
780   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
781   if (zz != yy) {
782     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
783   } else {
784     zarray = yarray;
785   }
786 
787   idx = a->j;
788   v   = a->a;
789   if (usecprow){
790     if (zz != yy){
791       ierr = PetscMemcpy(zarray,yarray,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
792     }
793     mbs  = a->compressedrow.nrows;
794     ii   = a->compressedrow.i;
795     ridx = a->compressedrow.rindex;
796   } else {
797     ii  = a->i;
798     y   = yarray;
799     z   = zarray;
800   }
801 
802   for (i=0; i<mbs; i++) {
803     n  = ii[1] - ii[0]; ii++;
804     if (usecprow){
805       z = zarray + 3*ridx[i];
806       y = yarray + 3*ridx[i];
807     }
808     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
809     for (j=0; j<n; j++) {
810       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
811       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
812       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
813       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
814       v += 9;
815     }
816     z[0] = sum1; z[1] = sum2; z[2] = sum3;
817     if (!usecprow){
818       z += 3; y += 3;
819     }
820   }
821   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
822   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
823   if (zz != yy) {
824     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
825   }
826   ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr);
827   PetscFunctionReturn(0);
828 }
829 
830 #undef __FUNCT__
831 #define __FUNCT__ "MatMultAdd_SeqBAIJ_4"
832 PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
833 {
834   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
835   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*yarray,*zarray;
836   MatScalar      *v;
837   PetscErrorCode ierr;
838   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
839   PetscTruth     usecprow=a->compressedrow.use;
840 
841   PetscFunctionBegin;
842   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
843   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
844   if (zz != yy) {
845     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
846   } else {
847     zarray = yarray;
848   }
849 
850   idx   = a->j;
851   v     = a->a;
852   if (usecprow){
853     if (zz != yy){
854       ierr = PetscMemcpy(zarray,yarray,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
855     }
856     mbs  = a->compressedrow.nrows;
857     ii   = a->compressedrow.i;
858     ridx = a->compressedrow.rindex;
859   } else {
860     ii  = a->i;
861     y   = yarray;
862     z   = zarray;
863   }
864 
865   for (i=0; i<mbs; i++) {
866     n  = ii[1] - ii[0]; ii++;
867     if (usecprow){
868       z = zarray + 4*ridx[i];
869       y = yarray + 4*ridx[i];
870     }
871     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
872     for (j=0; j<n; j++) {
873       xb = x + 4*(*idx++);
874       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
875       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
876       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
877       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
878       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
879       v += 16;
880     }
881     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
882     if (!usecprow){
883       z += 4; y += 4;
884     }
885   }
886   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
887   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
888   if (zz != yy) {
889     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
890   }
891   ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr);
892   PetscFunctionReturn(0);
893 }
894 
895 #undef __FUNCT__
896 #define __FUNCT__ "MatMultAdd_SeqBAIJ_5"
897 PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
898 {
899   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
900   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
901   PetscScalar    *yarray,*zarray;
902   MatScalar      *v;
903   PetscErrorCode ierr;
904   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
905   PetscTruth     usecprow=a->compressedrow.use;
906 
907   PetscFunctionBegin;
908   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
909   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
910   if (zz != yy) {
911     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
912   } else {
913     zarray = yarray;
914   }
915 
916   idx = a->j;
917   v   = a->a;
918   if (usecprow){
919     if (zz != yy){
920       ierr = PetscMemcpy(zarray,yarray,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
921     }
922     mbs  = a->compressedrow.nrows;
923     ii   = a->compressedrow.i;
924     ridx = a->compressedrow.rindex;
925   } else {
926     ii  = a->i;
927     y   = yarray;
928     z   = zarray;
929   }
930 
931   for (i=0; i<mbs; i++) {
932     n  = ii[1] - ii[0]; ii++;
933     if (usecprow){
934       z = zarray + 5*ridx[i];
935       y = yarray + 5*ridx[i];
936     }
937     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
938     for (j=0; j<n; j++) {
939       xb = x + 5*(*idx++);
940       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
941       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
942       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
943       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
944       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
945       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
946       v += 25;
947     }
948     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
949     if (!usecprow){
950       z += 5; y += 5;
951     }
952   }
953   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
954   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
955   if (zz != yy) {
956     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
957   }
958   ierr = PetscLogFlops(50*a->nz);CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 #undef __FUNCT__
962 #define __FUNCT__ "MatMultAdd_SeqBAIJ_6"
963 PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
964 {
965   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
966   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
967   PetscScalar    x1,x2,x3,x4,x5,x6,*yarray,*zarray;
968   MatScalar      *v;
969   PetscErrorCode ierr;
970   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
971   PetscTruth     usecprow=a->compressedrow.use;
972 
973   PetscFunctionBegin;
974   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
975   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
976   if (zz != yy) {
977     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
978   } else {
979     zarray = yarray;
980   }
981 
982   idx = a->j;
983   v   = a->a;
984   if (usecprow){
985     if (zz != yy){
986       ierr = PetscMemcpy(zarray,yarray,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
987     }
988     mbs  = a->compressedrow.nrows;
989     ii   = a->compressedrow.i;
990     ridx = a->compressedrow.rindex;
991   } else {
992     ii  = a->i;
993     y   = yarray;
994     z   = zarray;
995   }
996 
997   for (i=0; i<mbs; i++) {
998     n  = ii[1] - ii[0]; ii++;
999     if (usecprow){
1000       z = zarray + 6*ridx[i];
1001       y = yarray + 6*ridx[i];
1002     }
1003     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5];
1004     for (j=0; j<n; j++) {
1005       xb = x + 6*(*idx++);
1006       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
1007       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
1008       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
1009       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
1010       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
1011       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
1012       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
1013       v += 36;
1014     }
1015     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
1016     if (!usecprow){
1017       z += 6; y += 6;
1018     }
1019   }
1020   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1021   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
1022   if (zz != yy) {
1023     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1024   }
1025   ierr = PetscLogFlops(72*a->nz);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatMultAdd_SeqBAIJ_7"
1031 PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1032 {
1033   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1034   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1035   PetscScalar    x1,x2,x3,x4,x5,x6,x7,*yarray,*zarray;
1036   MatScalar      *v;
1037   PetscErrorCode ierr;
1038   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
1039   PetscTruth     usecprow=a->compressedrow.use;
1040 
1041   PetscFunctionBegin;
1042   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1043   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
1044   if (zz != yy) {
1045     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
1046   } else {
1047     zarray = yarray;
1048   }
1049 
1050   idx = a->j;
1051   v   = a->a;
1052   if (usecprow){
1053     if (zz != yy){
1054       ierr = PetscMemcpy(zarray,yarray,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1055     }
1056     mbs  = a->compressedrow.nrows;
1057     ii   = a->compressedrow.i;
1058     ridx = a->compressedrow.rindex;
1059   } else {
1060     ii  = a->i;
1061     y   = yarray;
1062     z   = zarray;
1063   }
1064 
1065   for (i=0; i<mbs; i++) {
1066     n  = ii[1] - ii[0]; ii++;
1067     if (usecprow){
1068       z = zarray + 7*ridx[i];
1069       y = yarray + 7*ridx[i];
1070     }
1071     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
1072     for (j=0; j<n; j++) {
1073       xb = x + 7*(*idx++);
1074       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1075       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1076       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1077       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1078       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1079       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1080       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1081       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1082       v += 49;
1083     }
1084     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1085     if (!usecprow){
1086       z += 7; y += 7;
1087     }
1088   }
1089   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1090   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
1091   if (zz != yy) {
1092     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1093   }
1094   ierr = PetscLogFlops(98*a->nz);CHKERRQ(ierr);
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 #undef __FUNCT__
1099 #define __FUNCT__ "MatMultAdd_SeqBAIJ_N"
1100 PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1101 {
1102   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1103   PetscScalar    *x,*z = 0,*xb,*work,*workt,*zarray;
1104   MatScalar      *v;
1105   PetscErrorCode ierr;
1106   PetscInt       mbs,i,*idx,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2;
1107   PetscInt       ncols,k,*ridx=PETSC_NULL;
1108   PetscTruth     usecprow=a->compressedrow.use;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
1112   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1113   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
1114 
1115   idx = a->j;
1116   v   = a->a;
1117   if (usecprow){
1118     mbs    = a->compressedrow.nrows;
1119     ii     = a->compressedrow.i;
1120     ridx = a->compressedrow.rindex;
1121   } else {
1122     mbs = a->mbs;
1123     ii  = a->i;
1124     z   = zarray;
1125   }
1126 
1127   if (!a->mult_work) {
1128     k    = PetscMax(A->rmap.n,A->cmap.n);
1129     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1130   }
1131   work = a->mult_work;
1132   for (i=0; i<mbs; i++) {
1133     n     = ii[1] - ii[0]; ii++;
1134     ncols = n*bs;
1135     workt = work;
1136     for (j=0; j<n; j++) {
1137       xb = x + bs*(*idx++);
1138       for (k=0; k<bs; k++) workt[k] = xb[k];
1139       workt += bs;
1140     }
1141     if (usecprow) z = zarray + bs*ridx[i];
1142     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
1143     /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */
1144     v += n*bs2;
1145     if (!usecprow){
1146       z += bs;
1147     }
1148   }
1149   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1150   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1151   ierr = PetscLogFlops(2*a->nz*bs2);CHKERRQ(ierr);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatMultTranspose_SeqBAIJ"
1157 PetscErrorCode MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz)
1158 {
1159   PetscScalar    zero = 0.0;
1160   PetscErrorCode ierr;
1161 
1162   PetscFunctionBegin;
1163   ierr = VecSet(zz,zero);CHKERRQ(ierr);
1164   ierr = MatMultTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr);
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 #undef __FUNCT__
1169 #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ"
1170 PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1171 
1172 {
1173   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1174   PetscScalar       *zb,*x,*z,*xb = 0,x1,x2,x3,x4,x5;
1175   MatScalar         *v;
1176   PetscErrorCode    ierr;
1177   PetscInt          mbs,i,*idx,*ii,rval,bs=A->rmap.bs,j,n,bs2=a->bs2,*ib,*ridx=PETSC_NULL;
1178   Mat_CompressedRow cprow = a->compressedrow;
1179   PetscTruth        usecprow=cprow.use;
1180 
1181   PetscFunctionBegin;
1182   if (yy != zz) { ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); }
1183   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1184   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
1185 
1186   idx = a->j;
1187   v   = a->a;
1188   if (usecprow){
1189     mbs  = cprow.nrows;
1190     ii   = cprow.i;
1191     ridx = cprow.rindex;
1192   } else {
1193     mbs=a->mbs;
1194     ii = a->i;
1195     xb = x;
1196   }
1197 
1198   switch (bs) {
1199   case 1:
1200     for (i=0; i<mbs; i++) {
1201       if (usecprow) xb = x + ridx[i];
1202       x1 = xb[0];
1203       ib = idx + ii[0];
1204       n  = ii[1] - ii[0]; ii++;
1205       for (j=0; j<n; j++) {
1206         rval    = ib[j];
1207         z[rval] += *v * x1;
1208         v++;
1209       }
1210       if (!usecprow) xb++;
1211     }
1212     break;
1213   case 2:
1214     for (i=0; i<mbs; i++) {
1215       if (usecprow) xb = x + 2*ridx[i];
1216       x1 = xb[0]; x2 = xb[1];
1217       ib = idx + ii[0];
1218       n  = ii[1] - ii[0]; ii++;
1219       for (j=0; j<n; j++) {
1220         rval      = ib[j]*2;
1221         z[rval++] += v[0]*x1 + v[1]*x2;
1222         z[rval++] += v[2]*x1 + v[3]*x2;
1223         v  += 4;
1224       }
1225       if (!usecprow) xb += 2;
1226     }
1227     break;
1228   case 3:
1229     for (i=0; i<mbs; i++) {
1230       if (usecprow) xb = x + 3*ridx[i];
1231       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1232       ib = idx + ii[0];
1233       n  = ii[1] - ii[0]; ii++;
1234       for (j=0; j<n; j++) {
1235         rval      = ib[j]*3;
1236         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1237         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1238         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1239         v  += 9;
1240       }
1241       if (!usecprow) xb += 3;
1242     }
1243     break;
1244   case 4:
1245     for (i=0; i<mbs; i++) {
1246       if (usecprow) xb = x + 4*ridx[i];
1247       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1248       ib = idx + ii[0];
1249       n  = ii[1] - ii[0]; ii++;
1250       for (j=0; j<n; j++) {
1251         rval      = ib[j]*4;
1252         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1253         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1254         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1255         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1256         v  += 16;
1257       }
1258       if (!usecprow) xb += 4;
1259     }
1260     break;
1261   case 5:
1262     for (i=0; i<mbs; i++) {
1263       if (usecprow) xb = x + 5*ridx[i];
1264       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1265       x4 = xb[3]; x5 = xb[4];
1266       ib = idx + ii[0];
1267       n  = ii[1] - ii[0]; ii++;
1268       for (j=0; j<n; j++) {
1269         rval      = ib[j]*5;
1270         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1271         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1272         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1273         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1274         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1275         v  += 25;
1276       }
1277       if (!usecprow) xb += 5;
1278     }
1279     break;
1280   default: {      /* block sizes larger then 5 by 5 are handled by BLAS */
1281       PetscInt     ncols,k;
1282       PetscScalar  *work,*workt,*xtmp;
1283 
1284       if (!a->mult_work) {
1285         k = PetscMax(A->rmap.n,A->cmap.n);
1286         ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1287       }
1288       work = a->mult_work;
1289       xtmp = x;
1290       for (i=0; i<mbs; i++) {
1291         n     = ii[1] - ii[0]; ii++;
1292         ncols = n*bs;
1293         ierr  = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1294         if (usecprow) {
1295           xtmp = x + bs*ridx[i];
1296         }
1297         Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work);
1298         /* BLASgemv_("T",&bs,&ncols,&_DOne,v,&bs,xtmp,&_One,&_DOne,work,&_One); */
1299         v += n*bs2;
1300         if (!usecprow) xtmp += bs;
1301         workt = work;
1302         for (j=0; j<n; j++) {
1303           zb = z + bs*(*idx++);
1304           for (k=0; k<bs; k++) zb[k] += workt[k] ;
1305           workt += bs;
1306         }
1307       }
1308     }
1309   }
1310   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1311   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
1312   ierr = PetscLogFlops(2*a->nz*a->bs2);CHKERRQ(ierr);
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 #undef __FUNCT__
1317 #define __FUNCT__ "MatScale_SeqBAIJ"
1318 PetscErrorCode MatScale_SeqBAIJ(Mat inA,PetscScalar alpha)
1319 {
1320   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)inA->data;
1321   PetscInt     totalnz = a->bs2*a->nz;
1322   PetscScalar  oalpha = alpha;
1323   PetscErrorCode ierr;
1324 #if defined(PETSC_USE_MAT_SINGLE)
1325   PetscInt     i;
1326 #else
1327   PetscBLASInt tnz = (PetscBLASInt) totalnz,one = 1;
1328 #endif
1329 
1330   PetscFunctionBegin;
1331 #if defined(PETSC_USE_MAT_SINGLE)
1332   for (i=0; i<totalnz; i++) a->a[i] *= oalpha;
1333 #else
1334   BLASscal_(&tnz,&oalpha,a->a,&one);
1335 #endif
1336   ierr = PetscLogFlops(totalnz);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatNorm_SeqBAIJ"
1342 PetscErrorCode MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm)
1343 {
1344   PetscErrorCode ierr;
1345   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1346   MatScalar      *v = a->a;
1347   PetscReal      sum = 0.0;
1348   PetscInt       i,j,k,bs=A->rmap.bs,nz=a->nz,bs2=a->bs2,k1;
1349 
1350   PetscFunctionBegin;
1351   if (type == NORM_FROBENIUS) {
1352     for (i=0; i< bs2*nz; i++) {
1353 #if defined(PETSC_USE_COMPLEX)
1354       sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1355 #else
1356       sum += (*v)*(*v); v++;
1357 #endif
1358     }
1359     *norm = sqrt(sum);
1360   } else if (type == NORM_1) { /* maximum column sum */
1361     PetscReal *tmp;
1362     PetscInt  *bcol = a->j;
1363     ierr = PetscMalloc((A->cmap.n+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1364     ierr = PetscMemzero(tmp,A->cmap.n*sizeof(PetscReal));CHKERRQ(ierr);
1365     for (i=0; i<nz; i++){
1366       for (j=0; j<bs; j++){
1367         k1 = bs*(*bcol) + j; /* column index */
1368         for (k=0; k<bs; k++){
1369           tmp[k1] += PetscAbsScalar(*v); v++;
1370         }
1371       }
1372       bcol++;
1373     }
1374     *norm = 0.0;
1375     for (j=0; j<A->cmap.n; j++) {
1376       if (tmp[j] > *norm) *norm = tmp[j];
1377     }
1378     ierr = PetscFree(tmp);CHKERRQ(ierr);
1379   } else if (type == NORM_INFINITY) { /* maximum row sum */
1380     *norm = 0.0;
1381     for (k=0; k<bs; k++) {
1382       for (j=0; j<a->mbs; j++) {
1383         v = a->a + bs2*a->i[j] + k;
1384         sum = 0.0;
1385         for (i=0; i<a->i[j+1]-a->i[j]; i++) {
1386           for (k1=0; k1<bs; k1++){
1387             sum += PetscAbsScalar(*v);
1388             v   += bs;
1389           }
1390         }
1391         if (sum > *norm) *norm = sum;
1392       }
1393     }
1394   } else {
1395     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1396   }
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 
1401 #undef __FUNCT__
1402 #define __FUNCT__ "MatEqual_SeqBAIJ"
1403 PetscErrorCode MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg)
1404 {
1405   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data;
1406   PetscErrorCode ierr;
1407 
1408   PetscFunctionBegin;
1409   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1410   if ((A->rmap.N != B->rmap.N) || (A->cmap.n != B->cmap.n) || (A->rmap.bs != B->rmap.bs)|| (a->nz != b->nz)) {
1411     *flg = PETSC_FALSE;
1412     PetscFunctionReturn(0);
1413   }
1414 
1415   /* if the a->i are the same */
1416   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1417   if (!*flg) {
1418     PetscFunctionReturn(0);
1419   }
1420 
1421   /* if a->j are the same */
1422   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1423   if (!*flg) {
1424     PetscFunctionReturn(0);
1425   }
1426   /* if a->a are the same */
1427   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap.bs)*(B->rmap.bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1428   PetscFunctionReturn(0);
1429 
1430 }
1431 
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatGetDiagonal_SeqBAIJ"
1434 PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1435 {
1436   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1437   PetscErrorCode ierr;
1438   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1439   PetscScalar    *x,zero = 0.0;
1440   MatScalar      *aa,*aa_j;
1441 
1442   PetscFunctionBegin;
1443   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1444   bs   = A->rmap.bs;
1445   aa   = a->a;
1446   ai   = a->i;
1447   aj   = a->j;
1448   ambs = a->mbs;
1449   bs2  = a->bs2;
1450 
1451   ierr = VecSet(v,zero);CHKERRQ(ierr);
1452   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1453   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1454   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1455   for (i=0; i<ambs; i++) {
1456     for (j=ai[i]; j<ai[i+1]; j++) {
1457       if (aj[j] == i) {
1458         row  = i*bs;
1459         aa_j = aa+j*bs2;
1460         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1461         break;
1462       }
1463     }
1464   }
1465   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 #undef __FUNCT__
1470 #define __FUNCT__ "MatDiagonalScale_SeqBAIJ"
1471 PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1472 {
1473   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1474   PetscScalar    *l,*r,x,*li,*ri;
1475   MatScalar      *aa,*v;
1476   PetscErrorCode ierr;
1477   PetscInt       i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1478 
1479   PetscFunctionBegin;
1480   ai  = a->i;
1481   aj  = a->j;
1482   aa  = a->a;
1483   m   = A->rmap.n;
1484   n   = A->cmap.n;
1485   bs  = A->rmap.bs;
1486   mbs = a->mbs;
1487   bs2 = a->bs2;
1488   if (ll) {
1489     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
1490     ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1491     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1492     for (i=0; i<mbs; i++) { /* for each block row */
1493       M  = ai[i+1] - ai[i];
1494       li = l + i*bs;
1495       v  = aa + bs2*ai[i];
1496       for (j=0; j<M; j++) { /* for each block */
1497         for (k=0; k<bs2; k++) {
1498           (*v++) *= li[k%bs];
1499         }
1500       }
1501     }
1502     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1503     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1504   }
1505 
1506   if (rr) {
1507     ierr = VecGetArray(rr,&r);CHKERRQ(ierr);
1508     ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr);
1509     if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
1510     for (i=0; i<mbs; i++) { /* for each block row */
1511       M  = ai[i+1] - ai[i];
1512       v  = aa + bs2*ai[i];
1513       for (j=0; j<M; j++) { /* for each block */
1514         ri = r + bs*aj[ai[i]+j];
1515         for (k=0; k<bs; k++) {
1516           x = ri[k];
1517           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
1518         }
1519       }
1520     }
1521     ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr);
1522     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1523   }
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 
1528 #undef __FUNCT__
1529 #define __FUNCT__ "MatGetInfo_SeqBAIJ"
1530 PetscErrorCode MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1531 {
1532   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1533 
1534   PetscFunctionBegin;
1535   info->rows_global    = (double)A->rmap.N;
1536   info->columns_global = (double)A->cmap.n;
1537   info->rows_local     = (double)A->rmap.n;
1538   info->columns_local  = (double)A->cmap.n;
1539   info->block_size     = a->bs2;
1540   info->nz_allocated   = a->maxnz;
1541   info->nz_used        = a->bs2*a->nz;
1542   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1543   info->assemblies   = A->num_ass;
1544   info->mallocs      = a->reallocs;
1545   info->memory       = ((PetscObject)A)->mem;
1546   if (A->factor) {
1547     info->fill_ratio_given  = A->info.fill_ratio_given;
1548     info->fill_ratio_needed = A->info.fill_ratio_needed;
1549     info->factor_mallocs    = A->info.factor_mallocs;
1550   } else {
1551     info->fill_ratio_given  = 0;
1552     info->fill_ratio_needed = 0;
1553     info->factor_mallocs    = 0;
1554   }
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MatZeroEntries_SeqBAIJ"
1561 PetscErrorCode MatZeroEntries_SeqBAIJ(Mat A)
1562 {
1563   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1564   PetscErrorCode ierr;
1565 
1566   PetscFunctionBegin;
1567   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
1568   PetscFunctionReturn(0);
1569 }
1570