xref: /petsc/src/mat/impls/baij/seq/baij2.c (revision d4bf62d159b4eec940d9872c577cf78ea329f539) !
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/seq/baij.h"
4 #include "../src/mat/blockinvert.h"
5 #include "petscbt.h"
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ"
9 PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
10 {
11   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
12   PetscErrorCode ierr;
13   PetscInt       row,i,j,k,l,m,n,*nidx,isz,val,ival;
14   const PetscInt *idx;
15   PetscInt       start,end,*ai,*aj,bs,*nidx2;
16   PetscBT        table;
17 
18   PetscFunctionBegin;
19   m     = a->mbs;
20   ai    = a->i;
21   aj    = a->j;
22   bs    = A->rmap->bs;
23 
24   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
25 
26   ierr = PetscBTCreate(m,table);CHKERRQ(ierr);
27   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr);
28   ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr);
29 
30   for (i=0; i<is_max; i++) {
31     /* Initialise the two local arrays */
32     isz  = 0;
33     ierr = PetscBTMemzero(m,table);CHKERRQ(ierr);
34 
35     /* Extract the indices, assume there can be duplicate entries */
36     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
37     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
38 
39     /* Enter these into the temp arrays i.e mark table[row], enter row into new index */
40     for (j=0; j<n ; ++j){
41       ival = idx[j]/bs; /* convert the indices into block indices */
42       if (ival>=m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
43       if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;}
44     }
45     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
46     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
47 
48     k = 0;
49     for (j=0; j<ov; j++){ /* for each overlap*/
50       n = isz;
51       for (; k<n ; k++){ /* do only those rows in nidx[k], which are not done yet */
52         row   = nidx[k];
53         start = ai[row];
54         end   = ai[row+1];
55         for (l = start; l<end ; l++){
56           val = aj[l];
57           if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;}
58         }
59       }
60     }
61     /* expand the Index Set */
62     for (j=0; j<isz; j++) {
63       for (k=0; k<bs; k++)
64         nidx2[j*bs+k] = nidx[j]*bs+k;
65     }
66     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);CHKERRQ(ierr);
67   }
68   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
69   ierr = PetscFree(nidx);CHKERRQ(ierr);
70   ierr = PetscFree(nidx2);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private"
76 PetscErrorCode MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
77 {
78   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*c;
79   PetscErrorCode ierr;
80   PetscInt       *smap,i,k,kstart,kend,oldcols = a->nbs,*lens;
81   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
82   const PetscInt *irow,*icol;
83   PetscInt       nrows,ncols,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
84   PetscInt       *aj = a->j,*ai = a->i;
85   MatScalar      *mat_a;
86   Mat            C;
87   PetscTruth     flag,sorted;
88 
89   PetscFunctionBegin;
90   ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr);
91   if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
92 
93   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
94   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
95   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
96   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
97 
98   ierr = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr);
99   ssmap = smap;
100   ierr = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
101   ierr  = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<ncols; i++) smap[icol[i]] = i+1;
103   /* determine lens of each row */
104   for (i=0; i<nrows; i++) {
105     kstart  = ai[irow[i]];
106     kend    = kstart + a->ilen[irow[i]];
107     lens[i] = 0;
108       for (k=kstart; k<kend; k++) {
109         if (ssmap[aj[k]]) {
110           lens[i]++;
111         }
112       }
113     }
114   /* Create and fill new matrix */
115   if (scall == MAT_REUSE_MATRIX) {
116     c = (Mat_SeqBAIJ *)((*B)->data);
117 
118     if (c->mbs!=nrows || c->nbs!=ncols || (*B)->rmap->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
119     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr);
120     if (!flag) {
121       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
122     }
123     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr);
124     C = *B;
125   } else {
126     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
127     ierr = MatSetSizes(C,nrows*bs,ncols*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
128     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
129     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,lens);CHKERRQ(ierr);
130   }
131   c = (Mat_SeqBAIJ *)(C->data);
132   for (i=0; i<nrows; i++) {
133     row    = irow[i];
134     kstart = ai[row];
135     kend   = kstart + a->ilen[row];
136     mat_i  = c->i[i];
137     mat_j  = c->j + mat_i;
138     mat_a  = c->a + mat_i*bs2;
139     mat_ilen = c->ilen + i;
140     for (k=kstart; k<kend; k++) {
141       if ((tcol=ssmap[a->j[k]])) {
142         *mat_j++ = tcol - 1;
143         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
144         mat_a   += bs2;
145         (*mat_ilen)++;
146       }
147     }
148   }
149 
150   /* Free work space */
151   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
152   ierr = PetscFree(smap);CHKERRQ(ierr);
153   ierr = PetscFree(lens);CHKERRQ(ierr);
154   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
155   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
156 
157   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
158   *B = C;
159   PetscFunctionReturn(0);
160 }
161 
162 #undef __FUNCT__
163 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ"
164 PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
165 {
166   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
167   IS             is1,is2;
168   PetscErrorCode ierr;
169   PetscInt       *vary,*iary,nrows,ncols,i,bs=A->rmap->bs,count;
170   const PetscInt *irow,*icol;
171 
172   PetscFunctionBegin;
173   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
174   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
175   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
176   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
177 
178   /* Verify if the indices corespond to each element in a block
179    and form the IS with compressed IS */
180   ierr = PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);CHKERRQ(ierr);
181   iary = vary + a->mbs;
182   ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr);
183   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
184   count = 0;
185   for (i=0; i<a->mbs; i++) {
186     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Index set does not match blocks");
187     if (vary[i]==bs) iary[count++] = i;
188   }
189   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);CHKERRQ(ierr);
190 
191   ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr);
192   for (i=0; i<ncols; i++) vary[icol[i]/bs]++;
193   count = 0;
194   for (i=0; i<a->mbs; i++) {
195     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_PLIB,"Internal error in PETSc");
196     if (vary[i]==bs) iary[count++] = i;
197   }
198   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is2);CHKERRQ(ierr);
199   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
200   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
201   ierr = PetscFree(vary);CHKERRQ(ierr);
202 
203   ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,scall,B);CHKERRQ(ierr);
204   ISDestroy(is1);
205   ISDestroy(is2);
206   PetscFunctionReturn(0);
207 }
208 
209 #undef __FUNCT__
210 #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ"
211 PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
212 {
213   PetscErrorCode ierr;
214   PetscInt       i;
215 
216   PetscFunctionBegin;
217   if (scall == MAT_INITIAL_MATRIX) {
218     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
219   }
220 
221   for (i=0; i<n; i++) {
222     ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);CHKERRQ(ierr);
223   }
224   PetscFunctionReturn(0);
225 }
226 
227 
228 /* -------------------------------------------------------*/
229 /* Should check that shapes of vectors and matrices match */
230 /* -------------------------------------------------------*/
231 #include "petscblaslapack.h"
232 
233 #undef __FUNCT__
234 #define __FUNCT__ "MatMult_SeqBAIJ_1"
235 PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
236 {
237   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
238   PetscScalar       *z,sum;
239   const PetscScalar *x;
240   const MatScalar   *v;
241   PetscErrorCode    ierr;
242   PetscInt          mbs,i,*idx,*ii,n,*ridx=PETSC_NULL,nonzerorow=0;
243   PetscTruth        usecprow=a->compressedrow.use;
244 
245   PetscFunctionBegin;
246   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
247   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
248 
249   idx = a->j;
250   v   = a->a;
251   if (usecprow){
252     mbs  = a->compressedrow.nrows;
253     ii   = a->compressedrow.i;
254     ridx = a->compressedrow.rindex;
255   } else {
256     mbs = a->mbs;
257     ii  = a->i;
258   }
259 
260   for (i=0; i<mbs; i++) {
261     n    = ii[1] - ii[0]; ii++;
262     sum  = 0.0;
263     nonzerorow += (n>0);
264     while (n--) sum += *v++ * x[*idx++];
265     if (usecprow){
266       z[ridx[i]] = sum;
267     } else {
268       z[i] = sum;
269     }
270   }
271   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
272   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
273   ierr = PetscLogFlops(2.0*a->nz - nonzerorow);CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatMult_SeqBAIJ_2"
279 PetscErrorCode MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
280 {
281   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
282   PetscScalar       *z = 0,sum1,sum2,*zarray;
283   const PetscScalar *x,*xb;
284   PetscScalar       x1,x2;
285   const MatScalar   *v;
286   PetscErrorCode    ierr;
287   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
288   PetscTruth        usecprow=a->compressedrow.use;
289 
290   PetscFunctionBegin;
291   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
292   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
293 
294   idx = a->j;
295   v   = a->a;
296   if (usecprow){
297     mbs  = a->compressedrow.nrows;
298     ii   = a->compressedrow.i;
299     ridx = a->compressedrow.rindex;
300   } else {
301     mbs = a->mbs;
302     ii  = a->i;
303     z   = zarray;
304   }
305 
306   for (i=0; i<mbs; i++) {
307     n  = ii[1] - ii[0]; ii++;
308     sum1 = 0.0; sum2 = 0.0;
309     nonzerorow += (n>0);
310     for (j=0; j<n; j++) {
311       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
312       sum1 += v[0]*x1 + v[2]*x2;
313       sum2 += v[1]*x1 + v[3]*x2;
314       v += 4;
315     }
316     if (usecprow) z = zarray + 2*ridx[i];
317     z[0] = sum1; z[1] = sum2;
318     if (!usecprow) z += 2;
319   }
320   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
321   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
322   ierr = PetscLogFlops(8.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
323   PetscFunctionReturn(0);
324 }
325 
326 #undef __FUNCT__
327 #define __FUNCT__ "MatMult_SeqBAIJ_3"
328 PetscErrorCode MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
329 {
330   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
331   PetscScalar       *z = 0,sum1,sum2,sum3,x1,x2,x3,*zarray;
332   const PetscScalar *x,*xb;
333   const MatScalar   *v;
334   PetscErrorCode    ierr;
335   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
336   PetscTruth        usecprow=a->compressedrow.use;
337 
338 
339 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
340 #pragma disjoint(*v,*z,*xb)
341 #endif
342 
343   PetscFunctionBegin;
344   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
345   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
346 
347   idx = a->j;
348   v   = a->a;
349   if (usecprow){
350     mbs  = a->compressedrow.nrows;
351     ii   = a->compressedrow.i;
352     ridx = a->compressedrow.rindex;
353   } else {
354     mbs = a->mbs;
355     ii  = a->i;
356     z   = zarray;
357   }
358 
359   for (i=0; i<mbs; i++) {
360     n  = ii[1] - ii[0]; ii++;
361     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
362     nonzerorow += (n>0);
363     for (j=0; j<n; j++) {
364       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
365       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
366       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
367       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
368       v += 9;
369     }
370     if (usecprow) z = zarray + 3*ridx[i];
371     z[0] = sum1; z[1] = sum2; z[2] = sum3;
372     if (!usecprow) z += 3;
373   }
374   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
375   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
376   ierr = PetscLogFlops(18.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
377   PetscFunctionReturn(0);
378 }
379 
380 #undef __FUNCT__
381 #define __FUNCT__ "MatMult_SeqBAIJ_4"
382 PetscErrorCode MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
383 {
384   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
385   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray;
386   const PetscScalar *x,*xb;
387   const MatScalar   *v;
388   PetscErrorCode    ierr;
389   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
390   PetscTruth        usecprow=a->compressedrow.use;
391 
392   PetscFunctionBegin;
393   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
394   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
395 
396   idx = a->j;
397   v   = a->a;
398   if (usecprow){
399     mbs  = a->compressedrow.nrows;
400     ii   = a->compressedrow.i;
401     ridx = a->compressedrow.rindex;
402   } else {
403     mbs = a->mbs;
404     ii  = a->i;
405     z   = zarray;
406   }
407 
408   for (i=0; i<mbs; i++) {
409     n  = ii[1] - ii[0]; ii++;
410     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
411     nonzerorow += (n>0);
412     for (j=0; j<n; j++) {
413       xb = x + 4*(*idx++);
414       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
415       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
416       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
417       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
418       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
419       v += 16;
420     }
421     if (usecprow) z = zarray + 4*ridx[i];
422     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
423     if (!usecprow) z += 4;
424   }
425   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
426   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
427   ierr = PetscLogFlops(32.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
428   PetscFunctionReturn(0);
429 }
430 
431 #undef __FUNCT__
432 #define __FUNCT__ "MatMult_SeqBAIJ_5"
433 PetscErrorCode MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
434 {
435   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
436   PetscScalar       sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*z = 0,*zarray;
437   const PetscScalar *xb,*x;
438   const MatScalar   *v;
439   PetscErrorCode    ierr;
440   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
441   PetscTruth        usecprow=a->compressedrow.use;
442 
443   PetscFunctionBegin;
444   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
445   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
446 
447   idx = a->j;
448   v   = a->a;
449   if (usecprow){
450     mbs  = a->compressedrow.nrows;
451     ii   = a->compressedrow.i;
452     ridx = a->compressedrow.rindex;
453   } else {
454     mbs = a->mbs;
455     ii  = a->i;
456     z   = zarray;
457   }
458 
459   for (i=0; i<mbs; i++) {
460     n  = ii[1] - ii[0]; ii++;
461     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
462     nonzerorow += (n>0);
463     for (j=0; j<n; j++) {
464       xb = x + 5*(*idx++);
465       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
466       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
467       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
468       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
469       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
470       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
471       v += 25;
472     }
473     if (usecprow) z = zarray + 5*ridx[i];
474     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
475     if (!usecprow) z += 5;
476   }
477   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
478   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
479   ierr = PetscLogFlops(50.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
480   PetscFunctionReturn(0);
481 }
482 
483 
484 #undef __FUNCT__
485 #define __FUNCT__ "MatMult_SeqBAIJ_6"
486 PetscErrorCode MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz)
487 {
488   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
489   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,sum5,sum6;
490   const PetscScalar *x,*xb;
491   PetscScalar       x1,x2,x3,x4,x5,x6,*zarray;
492   const MatScalar   *v;
493   PetscErrorCode    ierr;
494   PetscInt          mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
495   PetscTruth        usecprow=a->compressedrow.use;
496 
497   PetscFunctionBegin;
498   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
499   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
500 
501   idx = a->j;
502   v   = a->a;
503   if (usecprow){
504     mbs  = a->compressedrow.nrows;
505     ii   = a->compressedrow.i;
506     ridx = a->compressedrow.rindex;
507   } else {
508     mbs = a->mbs;
509     ii  = a->i;
510     z   = zarray;
511   }
512 
513   for (i=0; i<mbs; i++) {
514     n  = ii[1] - ii[0]; ii++;
515     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0;
516     nonzerorow += (n>0);
517     for (j=0; j<n; j++) {
518       xb = x + 6*(*idx++);
519       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
520       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
521       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
522       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
523       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
524       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
525       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
526       v += 36;
527     }
528     if (usecprow) z = zarray + 6*ridx[i];
529     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
530     if (!usecprow) z += 6;
531   }
532 
533   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
534   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
535   ierr = PetscLogFlops(72.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 #undef __FUNCT__
539 #define __FUNCT__ "MatMult_SeqBAIJ_7"
540 PetscErrorCode MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
541 {
542   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
543   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
544   const PetscScalar *x,*xb;
545   PetscScalar       x1,x2,x3,x4,x5,x6,x7,*zarray;
546   const MatScalar   *v;
547   PetscErrorCode    ierr;
548   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
549   PetscTruth        usecprow=a->compressedrow.use;
550 
551   PetscFunctionBegin;
552   ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
553   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
554 
555   idx = a->j;
556   v   = a->a;
557   if (usecprow){
558     mbs    = a->compressedrow.nrows;
559     ii     = a->compressedrow.i;
560     ridx = a->compressedrow.rindex;
561   } else {
562     mbs = a->mbs;
563     ii  = a->i;
564     z   = zarray;
565   }
566 
567   for (i=0; i<mbs; i++) {
568     n  = ii[1] - ii[0]; ii++;
569     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
570     nonzerorow += (n>0);
571     for (j=0; j<n; j++) {
572       xb = x + 7*(*idx++);
573       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
574       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
575       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
576       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
577       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
578       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
579       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
580       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
581       v += 49;
582     }
583     if (usecprow) z = zarray + 7*ridx[i];
584     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
585     if (!usecprow) z += 7;
586   }
587 
588   ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr);
589   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
590   ierr = PetscLogFlops(98.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
591   PetscFunctionReturn(0);
592 }
593 
594 /*
595     This will not work with MatScalar == float because it calls the BLAS
596 */
597 #undef __FUNCT__
598 #define __FUNCT__ "MatMult_SeqBAIJ_N"
599 PetscErrorCode MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
600 {
601   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
602   PetscScalar    *x,*z = 0,*xb,*work,*workt,*zarray;
603   MatScalar      *v;
604   PetscErrorCode ierr;
605   PetscInt       mbs=a->mbs,i,*idx,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2;
606   PetscInt       ncols,k,*ridx=PETSC_NULL,nonzerorow=0;
607   PetscTruth     usecprow=a->compressedrow.use;
608 
609   PetscFunctionBegin;
610   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
611   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
612 
613   idx = a->j;
614   v   = a->a;
615   if (usecprow){
616     mbs  = a->compressedrow.nrows;
617     ii   = a->compressedrow.i;
618     ridx = a->compressedrow.rindex;
619   } else {
620     mbs = a->mbs;
621     ii  = a->i;
622     z   = zarray;
623   }
624 
625   if (!a->mult_work) {
626     k    = PetscMax(A->rmap->n,A->cmap->n);
627     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
628   }
629   work = a->mult_work;
630   for (i=0; i<mbs; i++) {
631     n     = ii[1] - ii[0]; ii++;
632     ncols = n*bs;
633     workt = work;
634     nonzerorow += (n>0);
635     for (j=0; j<n; j++) {
636       xb = x + bs*(*idx++);
637       for (k=0; k<bs; k++) workt[k] = xb[k];
638       workt += bs;
639     }
640     if (usecprow) z = zarray + bs*ridx[i];
641     Kernel_w_gets_Ar_times_v(bs,ncols,work,v,z);
642     /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */
643     v += n*bs2;
644     if (!usecprow) z += bs;
645   }
646   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
647   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
648   ierr = PetscLogFlops(2.0*a->nz*bs2 - bs*nonzerorow);CHKERRQ(ierr);
649   PetscFunctionReturn(0);
650 }
651 
652 extern PetscErrorCode VecCopy_Seq(Vec,Vec);
653 #undef __FUNCT__
654 #define __FUNCT__ "MatMultAdd_SeqBAIJ_1"
655 PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
656 {
657   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
658   PetscScalar    *x,*y = 0,*z = 0,sum,*yarray,*zarray;
659   MatScalar      *v;
660   PetscErrorCode ierr;
661   PetscInt       mbs=a->mbs,i,*idx,*ii,n,*ridx=PETSC_NULL;
662   PetscTruth     usecprow=a->compressedrow.use;
663 
664   PetscFunctionBegin;
665   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
666   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
667   if (zz != yy) {
668     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
669   } else {
670     zarray = yarray;
671   }
672 
673   idx = a->j;
674   v   = a->a;
675   if (usecprow){
676     if (zz != yy){
677       ierr = PetscMemcpy(zarray,yarray,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
678     }
679     mbs  = a->compressedrow.nrows;
680     ii   = a->compressedrow.i;
681     ridx = a->compressedrow.rindex;
682   } else {
683     ii  = a->i;
684     y   = yarray;
685     z   = zarray;
686   }
687 
688   for (i=0; i<mbs; i++) {
689     n    = ii[1] - ii[0]; ii++;
690     if (usecprow){
691       z = zarray + ridx[i];
692       y = yarray + ridx[i];
693     }
694     sum = y[0];
695     while (n--) sum += *v++ * x[*idx++];
696     z[0] = sum;
697     if (!usecprow){
698       z++; y++;
699     }
700   }
701   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
702   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
703   if (zz != yy) {
704     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
705   }
706   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNCT__
711 #define __FUNCT__ "MatMultAdd_SeqBAIJ_2"
712 PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
713 {
714   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
715   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2;
716   PetscScalar    x1,x2,*yarray,*zarray;
717   MatScalar      *v;
718   PetscErrorCode ierr;
719   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
720   PetscTruth     usecprow=a->compressedrow.use;
721 
722   PetscFunctionBegin;
723   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
724   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
725   if (zz != yy) {
726     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
727   } else {
728     zarray = yarray;
729   }
730 
731   idx = a->j;
732   v   = a->a;
733   if (usecprow){
734     if (zz != yy){
735       ierr = PetscMemcpy(zarray,yarray,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
736     }
737     mbs  = a->compressedrow.nrows;
738     ii   = a->compressedrow.i;
739     ridx = a->compressedrow.rindex;
740     if (zz != yy){
741       ierr = PetscMemcpy(zarray,yarray,a->mbs*sizeof(PetscScalar));CHKERRQ(ierr);
742     }
743   } else {
744     ii  = a->i;
745     y   = yarray;
746     z   = zarray;
747   }
748 
749   for (i=0; i<mbs; i++) {
750     n  = ii[1] - ii[0]; ii++;
751     if (usecprow){
752       z = zarray + 2*ridx[i];
753       y = yarray + 2*ridx[i];
754     }
755     sum1 = y[0]; sum2 = y[1];
756     for (j=0; j<n; j++) {
757       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
758       sum1 += v[0]*x1 + v[2]*x2;
759       sum2 += v[1]*x1 + v[3]*x2;
760       v += 4;
761     }
762     z[0] = sum1; z[1] = sum2;
763     if (!usecprow){
764       z += 2; y += 2;
765     }
766   }
767   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
768   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
769   if (zz != yy) {
770     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
771   }
772   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
773   PetscFunctionReturn(0);
774 }
775 
776 #undef __FUNCT__
777 #define __FUNCT__ "MatMultAdd_SeqBAIJ_3"
778 PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
779 {
780   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
781   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,x1,x2,x3,*yarray,*zarray;
782   MatScalar      *v;
783   PetscErrorCode ierr;
784   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
785   PetscTruth     usecprow=a->compressedrow.use;
786 
787   PetscFunctionBegin;
788   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
789   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
790   if (zz != yy) {
791     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
792   } else {
793     zarray = yarray;
794   }
795 
796   idx = a->j;
797   v   = a->a;
798   if (usecprow){
799     if (zz != yy){
800       ierr = PetscMemcpy(zarray,yarray,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
801     }
802     mbs  = a->compressedrow.nrows;
803     ii   = a->compressedrow.i;
804     ridx = a->compressedrow.rindex;
805   } else {
806     ii  = a->i;
807     y   = yarray;
808     z   = zarray;
809   }
810 
811   for (i=0; i<mbs; i++) {
812     n  = ii[1] - ii[0]; ii++;
813     if (usecprow){
814       z = zarray + 3*ridx[i];
815       y = yarray + 3*ridx[i];
816     }
817     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
818     for (j=0; j<n; j++) {
819       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
820       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
821       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
822       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
823       v += 9;
824     }
825     z[0] = sum1; z[1] = sum2; z[2] = sum3;
826     if (!usecprow){
827       z += 3; y += 3;
828     }
829   }
830   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
831   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
832   if (zz != yy) {
833     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
834   }
835   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
836   PetscFunctionReturn(0);
837 }
838 
839 #undef __FUNCT__
840 #define __FUNCT__ "MatMultAdd_SeqBAIJ_4"
841 PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
842 {
843   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
844   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*yarray,*zarray;
845   MatScalar      *v;
846   PetscErrorCode ierr;
847   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
848   PetscTruth     usecprow=a->compressedrow.use;
849 
850   PetscFunctionBegin;
851   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
852   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
853   if (zz != yy) {
854     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
855   } else {
856     zarray = yarray;
857   }
858 
859   idx   = a->j;
860   v     = a->a;
861   if (usecprow){
862     if (zz != yy){
863       ierr = PetscMemcpy(zarray,yarray,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
864     }
865     mbs  = a->compressedrow.nrows;
866     ii   = a->compressedrow.i;
867     ridx = a->compressedrow.rindex;
868   } else {
869     ii  = a->i;
870     y   = yarray;
871     z   = zarray;
872   }
873 
874   for (i=0; i<mbs; i++) {
875     n  = ii[1] - ii[0]; ii++;
876     if (usecprow){
877       z = zarray + 4*ridx[i];
878       y = yarray + 4*ridx[i];
879     }
880     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
881     for (j=0; j<n; j++) {
882       xb = x + 4*(*idx++);
883       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
884       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
885       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
886       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
887       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
888       v += 16;
889     }
890     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
891     if (!usecprow){
892       z += 4; y += 4;
893     }
894   }
895   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
896   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
897   if (zz != yy) {
898     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
899   }
900   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatMultAdd_SeqBAIJ_5"
906 PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
907 {
908   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
909   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
910   PetscScalar    *yarray,*zarray;
911   MatScalar      *v;
912   PetscErrorCode ierr;
913   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
914   PetscTruth     usecprow=a->compressedrow.use;
915 
916   PetscFunctionBegin;
917   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
918   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
919   if (zz != yy) {
920     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
921   } else {
922     zarray = yarray;
923   }
924 
925   idx = a->j;
926   v   = a->a;
927   if (usecprow){
928     if (zz != yy){
929       ierr = PetscMemcpy(zarray,yarray,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
930     }
931     mbs  = a->compressedrow.nrows;
932     ii   = a->compressedrow.i;
933     ridx = a->compressedrow.rindex;
934   } else {
935     ii  = a->i;
936     y   = yarray;
937     z   = zarray;
938   }
939 
940   for (i=0; i<mbs; i++) {
941     n  = ii[1] - ii[0]; ii++;
942     if (usecprow){
943       z = zarray + 5*ridx[i];
944       y = yarray + 5*ridx[i];
945     }
946     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
947     for (j=0; j<n; j++) {
948       xb = x + 5*(*idx++);
949       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
950       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
951       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
952       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
953       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
954       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
955       v += 25;
956     }
957     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
958     if (!usecprow){
959       z += 5; y += 5;
960     }
961   }
962   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
963   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
964   if (zz != yy) {
965     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
966   }
967   ierr = PetscLogFlops(50.0*a->nz);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 #undef __FUNCT__
971 #define __FUNCT__ "MatMultAdd_SeqBAIJ_6"
972 PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
973 {
974   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
975   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
976   PetscScalar    x1,x2,x3,x4,x5,x6,*yarray,*zarray;
977   MatScalar      *v;
978   PetscErrorCode ierr;
979   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
980   PetscTruth     usecprow=a->compressedrow.use;
981 
982   PetscFunctionBegin;
983   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
984   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
985   if (zz != yy) {
986     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
987   } else {
988     zarray = yarray;
989   }
990 
991   idx = a->j;
992   v   = a->a;
993   if (usecprow){
994     if (zz != yy){
995       ierr = PetscMemcpy(zarray,yarray,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
996     }
997     mbs  = a->compressedrow.nrows;
998     ii   = a->compressedrow.i;
999     ridx = a->compressedrow.rindex;
1000   } else {
1001     ii  = a->i;
1002     y   = yarray;
1003     z   = zarray;
1004   }
1005 
1006   for (i=0; i<mbs; i++) {
1007     n  = ii[1] - ii[0]; ii++;
1008     if (usecprow){
1009       z = zarray + 6*ridx[i];
1010       y = yarray + 6*ridx[i];
1011     }
1012     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5];
1013     for (j=0; j<n; j++) {
1014       xb = x + 6*(*idx++);
1015       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
1016       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
1017       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
1018       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
1019       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
1020       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
1021       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
1022       v += 36;
1023     }
1024     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
1025     if (!usecprow){
1026       z += 6; y += 6;
1027     }
1028   }
1029   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1030   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
1031   if (zz != yy) {
1032     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1033   }
1034   ierr = PetscLogFlops(72.0*a->nz);CHKERRQ(ierr);
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 #undef __FUNCT__
1039 #define __FUNCT__ "MatMultAdd_SeqBAIJ_7"
1040 PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1041 {
1042   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1043   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1044   PetscScalar    x1,x2,x3,x4,x5,x6,x7,*yarray,*zarray;
1045   MatScalar      *v;
1046   PetscErrorCode ierr;
1047   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
1048   PetscTruth     usecprow=a->compressedrow.use;
1049 
1050   PetscFunctionBegin;
1051   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1052   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
1053   if (zz != yy) {
1054     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
1055   } else {
1056     zarray = yarray;
1057   }
1058 
1059   idx = a->j;
1060   v   = a->a;
1061   if (usecprow){
1062     if (zz != yy){
1063       ierr = PetscMemcpy(zarray,yarray,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1064     }
1065     mbs  = a->compressedrow.nrows;
1066     ii   = a->compressedrow.i;
1067     ridx = a->compressedrow.rindex;
1068   } else {
1069     ii  = a->i;
1070     y   = yarray;
1071     z   = zarray;
1072   }
1073 
1074   for (i=0; i<mbs; i++) {
1075     n  = ii[1] - ii[0]; ii++;
1076     if (usecprow){
1077       z = zarray + 7*ridx[i];
1078       y = yarray + 7*ridx[i];
1079     }
1080     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
1081     for (j=0; j<n; j++) {
1082       xb = x + 7*(*idx++);
1083       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1084       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1085       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1086       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1087       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1088       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1089       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1090       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1091       v += 49;
1092     }
1093     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1094     if (!usecprow){
1095       z += 7; y += 7;
1096     }
1097   }
1098   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1099   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
1100   if (zz != yy) {
1101     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1102   }
1103   ierr = PetscLogFlops(98.0*a->nz);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatMultAdd_SeqBAIJ_N"
1109 PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1110 {
1111   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1112   PetscScalar    *x,*z = 0,*xb,*work,*workt,*zarray;
1113   MatScalar      *v;
1114   PetscErrorCode ierr;
1115   PetscInt       mbs,i,*idx,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2;
1116   PetscInt       ncols,k,*ridx=PETSC_NULL;
1117   PetscTruth     usecprow=a->compressedrow.use;
1118 
1119   PetscFunctionBegin;
1120   ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr);
1121   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1122   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
1123 
1124   idx = a->j;
1125   v   = a->a;
1126   if (usecprow){
1127     mbs    = a->compressedrow.nrows;
1128     ii     = a->compressedrow.i;
1129     ridx = a->compressedrow.rindex;
1130   } else {
1131     mbs = a->mbs;
1132     ii  = a->i;
1133     z   = zarray;
1134   }
1135 
1136   if (!a->mult_work) {
1137     k    = PetscMax(A->rmap->n,A->cmap->n);
1138     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1139   }
1140   work = a->mult_work;
1141   for (i=0; i<mbs; i++) {
1142     n     = ii[1] - ii[0]; ii++;
1143     ncols = n*bs;
1144     workt = work;
1145     for (j=0; j<n; j++) {
1146       xb = x + bs*(*idx++);
1147       for (k=0; k<bs; k++) workt[k] = xb[k];
1148       workt += bs;
1149     }
1150     if (usecprow) z = zarray + bs*ridx[i];
1151     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
1152     /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */
1153     v += n*bs2;
1154     if (!usecprow){
1155       z += bs;
1156     }
1157   }
1158   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1159   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1160   ierr = PetscLogFlops(2.0*a->nz*bs2);CHKERRQ(ierr);
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 #undef __FUNCT__
1165 #define __FUNCT__ "MatMultTranspose_SeqBAIJ"
1166 PetscErrorCode MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz)
1167 {
1168   PetscScalar    zero = 0.0;
1169   PetscErrorCode ierr;
1170 
1171   PetscFunctionBegin;
1172   ierr = VecSet(zz,zero);CHKERRQ(ierr);
1173   ierr = MatMultTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr);
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ"
1179 PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1180 
1181 {
1182   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1183   PetscScalar       *zb,*x,*z,*xb = 0,x1,x2,x3,x4,x5;
1184   MatScalar         *v;
1185   PetscErrorCode    ierr;
1186   PetscInt          mbs,i,*idx,*ii,rval,bs=A->rmap->bs,j,n,bs2=a->bs2,*ib,*ridx=PETSC_NULL;
1187   Mat_CompressedRow cprow = a->compressedrow;
1188   PetscTruth        usecprow=cprow.use;
1189 
1190   PetscFunctionBegin;
1191   if (yy != zz) { ierr = VecCopy_Seq(yy,zz);CHKERRQ(ierr); }
1192   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1193   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
1194 
1195   idx = a->j;
1196   v   = a->a;
1197   if (usecprow){
1198     mbs  = cprow.nrows;
1199     ii   = cprow.i;
1200     ridx = cprow.rindex;
1201   } else {
1202     mbs=a->mbs;
1203     ii = a->i;
1204     xb = x;
1205   }
1206 
1207   switch (bs) {
1208   case 1:
1209     for (i=0; i<mbs; i++) {
1210       if (usecprow) xb = x + ridx[i];
1211       x1 = xb[0];
1212       ib = idx + ii[0];
1213       n  = ii[1] - ii[0]; ii++;
1214       for (j=0; j<n; j++) {
1215         rval    = ib[j];
1216         z[rval] += *v * x1;
1217         v++;
1218       }
1219       if (!usecprow) xb++;
1220     }
1221     break;
1222   case 2:
1223     for (i=0; i<mbs; i++) {
1224       if (usecprow) xb = x + 2*ridx[i];
1225       x1 = xb[0]; x2 = xb[1];
1226       ib = idx + ii[0];
1227       n  = ii[1] - ii[0]; ii++;
1228       for (j=0; j<n; j++) {
1229         rval      = ib[j]*2;
1230         z[rval++] += v[0]*x1 + v[1]*x2;
1231         z[rval++] += v[2]*x1 + v[3]*x2;
1232         v  += 4;
1233       }
1234       if (!usecprow) xb += 2;
1235     }
1236     break;
1237   case 3:
1238     for (i=0; i<mbs; i++) {
1239       if (usecprow) xb = x + 3*ridx[i];
1240       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1241       ib = idx + ii[0];
1242       n  = ii[1] - ii[0]; ii++;
1243       for (j=0; j<n; j++) {
1244         rval      = ib[j]*3;
1245         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1246         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1247         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1248         v  += 9;
1249       }
1250       if (!usecprow) xb += 3;
1251     }
1252     break;
1253   case 4:
1254     for (i=0; i<mbs; i++) {
1255       if (usecprow) xb = x + 4*ridx[i];
1256       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1257       ib = idx + ii[0];
1258       n  = ii[1] - ii[0]; ii++;
1259       for (j=0; j<n; j++) {
1260         rval      = ib[j]*4;
1261         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1262         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1263         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1264         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1265         v  += 16;
1266       }
1267       if (!usecprow) xb += 4;
1268     }
1269     break;
1270   case 5:
1271     for (i=0; i<mbs; i++) {
1272       if (usecprow) xb = x + 5*ridx[i];
1273       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1274       x4 = xb[3]; x5 = xb[4];
1275       ib = idx + ii[0];
1276       n  = ii[1] - ii[0]; ii++;
1277       for (j=0; j<n; j++) {
1278         rval      = ib[j]*5;
1279         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1280         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1281         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1282         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1283         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1284         v  += 25;
1285       }
1286       if (!usecprow) xb += 5;
1287     }
1288     break;
1289   default: {      /* block sizes larger then 5 by 5 are handled by BLAS */
1290       PetscInt     ncols,k;
1291       PetscScalar  *work,*workt,*xtmp;
1292 
1293       if (!a->mult_work) {
1294         k = PetscMax(A->rmap->n,A->cmap->n);
1295         ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1296       }
1297       work = a->mult_work;
1298       xtmp = x;
1299       for (i=0; i<mbs; i++) {
1300         n     = ii[1] - ii[0]; ii++;
1301         ncols = n*bs;
1302         ierr  = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1303         if (usecprow) {
1304           xtmp = x + bs*ridx[i];
1305         }
1306         Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work);
1307         /* BLASgemv_("T",&bs,&ncols,&_DOne,v,&bs,xtmp,&_One,&_DOne,work,&_One); */
1308         v += n*bs2;
1309         if (!usecprow) xtmp += bs;
1310         workt = work;
1311         for (j=0; j<n; j++) {
1312           zb = z + bs*(*idx++);
1313           for (k=0; k<bs; k++) zb[k] += workt[k] ;
1314           workt += bs;
1315         }
1316       }
1317     }
1318   }
1319   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1320   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
1321   ierr = PetscLogFlops(2.0*a->nz*a->bs2);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatScale_SeqBAIJ"
1327 PetscErrorCode MatScale_SeqBAIJ(Mat inA,PetscScalar alpha)
1328 {
1329   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1330   PetscInt       totalnz = a->bs2*a->nz;
1331   PetscScalar    oalpha = alpha;
1332   PetscErrorCode ierr;
1333   PetscBLASInt   one = 1,tnz = PetscBLASIntCast(totalnz);
1334 
1335   PetscFunctionBegin;
1336   BLASscal_(&tnz,&oalpha,a->a,&one);
1337   ierr = PetscLogFlops(totalnz);CHKERRQ(ierr);
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 #undef __FUNCT__
1342 #define __FUNCT__ "MatNorm_SeqBAIJ"
1343 PetscErrorCode MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm)
1344 {
1345   PetscErrorCode ierr;
1346   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1347   MatScalar      *v = a->a;
1348   PetscReal      sum = 0.0;
1349   PetscInt       i,j,k,bs=A->rmap->bs,nz=a->nz,bs2=a->bs2,k1;
1350 
1351   PetscFunctionBegin;
1352   if (type == NORM_FROBENIUS) {
1353     for (i=0; i< bs2*nz; i++) {
1354 #if defined(PETSC_USE_COMPLEX)
1355       sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1356 #else
1357       sum += (*v)*(*v); v++;
1358 #endif
1359     }
1360     *norm = sqrt(sum);
1361   } else if (type == NORM_1) { /* maximum column sum */
1362     PetscReal *tmp;
1363     PetscInt  *bcol = a->j;
1364     ierr = PetscMalloc((A->cmap->n+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1365     ierr = PetscMemzero(tmp,A->cmap->n*sizeof(PetscReal));CHKERRQ(ierr);
1366     for (i=0; i<nz; i++){
1367       for (j=0; j<bs; j++){
1368         k1 = bs*(*bcol) + j; /* column index */
1369         for (k=0; k<bs; k++){
1370           tmp[k1] += PetscAbsScalar(*v); v++;
1371         }
1372       }
1373       bcol++;
1374     }
1375     *norm = 0.0;
1376     for (j=0; j<A->cmap->n; j++) {
1377       if (tmp[j] > *norm) *norm = tmp[j];
1378     }
1379     ierr = PetscFree(tmp);CHKERRQ(ierr);
1380   } else if (type == NORM_INFINITY) { /* maximum row sum */
1381     *norm = 0.0;
1382     for (k=0; k<bs; k++) {
1383       for (j=0; j<a->mbs; j++) {
1384         v = a->a + bs2*a->i[j] + k;
1385         sum = 0.0;
1386         for (i=0; i<a->i[j+1]-a->i[j]; i++) {
1387           for (k1=0; k1<bs; k1++){
1388             sum += PetscAbsScalar(*v);
1389             v   += bs;
1390           }
1391         }
1392         if (sum > *norm) *norm = sum;
1393       }
1394     }
1395   } else {
1396     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1397   }
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 
1402 #undef __FUNCT__
1403 #define __FUNCT__ "MatEqual_SeqBAIJ"
1404 PetscErrorCode MatEqual_SeqBAIJ(Mat A,Mat B,PetscTruth* flg)
1405 {
1406   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data;
1407   PetscErrorCode ierr;
1408 
1409   PetscFunctionBegin;
1410   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1411   if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1412     *flg = PETSC_FALSE;
1413     PetscFunctionReturn(0);
1414   }
1415 
1416   /* if the a->i are the same */
1417   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1418   if (!*flg) {
1419     PetscFunctionReturn(0);
1420   }
1421 
1422   /* if a->j are the same */
1423   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1424   if (!*flg) {
1425     PetscFunctionReturn(0);
1426   }
1427   /* if a->a are the same */
1428   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(B->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1429   PetscFunctionReturn(0);
1430 
1431 }
1432 
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatGetDiagonal_SeqBAIJ"
1435 PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1436 {
1437   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1438   PetscErrorCode ierr;
1439   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1440   PetscScalar    *x,zero = 0.0;
1441   MatScalar      *aa,*aa_j;
1442 
1443   PetscFunctionBegin;
1444   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1445   bs   = A->rmap->bs;
1446   aa   = a->a;
1447   ai   = a->i;
1448   aj   = a->j;
1449   ambs = a->mbs;
1450   bs2  = a->bs2;
1451 
1452   ierr = VecSet(v,zero);CHKERRQ(ierr);
1453   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1454   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1455   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1456   for (i=0; i<ambs; i++) {
1457     for (j=ai[i]; j<ai[i+1]; j++) {
1458       if (aj[j] == i) {
1459         row  = i*bs;
1460         aa_j = aa+j*bs2;
1461         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1462         break;
1463       }
1464     }
1465   }
1466   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 #undef __FUNCT__
1471 #define __FUNCT__ "MatDiagonalScale_SeqBAIJ"
1472 PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
1473 {
1474   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1475   PetscScalar    *l,*r,x,*li,*ri;
1476   MatScalar      *aa,*v;
1477   PetscErrorCode ierr;
1478   PetscInt       i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2;
1479 
1480   PetscFunctionBegin;
1481   ai  = a->i;
1482   aj  = a->j;
1483   aa  = a->a;
1484   m   = A->rmap->n;
1485   n   = A->cmap->n;
1486   bs  = A->rmap->bs;
1487   mbs = a->mbs;
1488   bs2 = a->bs2;
1489   if (ll) {
1490     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
1491     ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
1492     if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1493     for (i=0; i<mbs; i++) { /* for each block row */
1494       M  = ai[i+1] - ai[i];
1495       li = l + i*bs;
1496       v  = aa + bs2*ai[i];
1497       for (j=0; j<M; j++) { /* for each block */
1498         for (k=0; k<bs2; k++) {
1499           (*v++) *= li[k%bs];
1500         }
1501       }
1502     }
1503     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
1504     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1505   }
1506 
1507   if (rr) {
1508     ierr = VecGetArray(rr,&r);CHKERRQ(ierr);
1509     ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr);
1510     if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
1511     for (i=0; i<mbs; i++) { /* for each block row */
1512       M  = ai[i+1] - ai[i];
1513       v  = aa + bs2*ai[i];
1514       for (j=0; j<M; j++) { /* for each block */
1515         ri = r + bs*aj[ai[i]+j];
1516         for (k=0; k<bs; k++) {
1517           x = ri[k];
1518           for (tmp=0; tmp<bs; tmp++) (*v++) *= x;
1519         }
1520       }
1521     }
1522     ierr = VecRestoreArray(rr,&r);CHKERRQ(ierr);
1523     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
1524   }
1525   PetscFunctionReturn(0);
1526 }
1527 
1528 
1529 #undef __FUNCT__
1530 #define __FUNCT__ "MatGetInfo_SeqBAIJ"
1531 PetscErrorCode MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1532 {
1533   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1534 
1535   PetscFunctionBegin;
1536   info->block_size     = a->bs2;
1537   info->nz_allocated   = a->maxnz;
1538   info->nz_used        = a->bs2*a->nz;
1539   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1540   info->assemblies   = A->num_ass;
1541   info->mallocs      = a->reallocs;
1542   info->memory       = ((PetscObject)A)->mem;
1543   if (A->factor) {
1544     info->fill_ratio_given  = A->info.fill_ratio_given;
1545     info->fill_ratio_needed = A->info.fill_ratio_needed;
1546     info->factor_mallocs    = A->info.factor_mallocs;
1547   } else {
1548     info->fill_ratio_given  = 0;
1549     info->fill_ratio_needed = 0;
1550     info->factor_mallocs    = 0;
1551   }
1552   PetscFunctionReturn(0);
1553 }
1554 
1555 
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatZeroEntries_SeqBAIJ"
1558 PetscErrorCode MatZeroEntries_SeqBAIJ(Mat A)
1559 {
1560   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1561   PetscErrorCode ierr;
1562 
1563   PetscFunctionBegin;
1564   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
1565   PetscFunctionReturn(0);
1566 }
1567