xref: /petsc/src/mat/impls/baij/seq/baij2.c (revision 2205254efee3a00a594e5e2a3a70f74dcb40bc03)
1 
2 #include <../src/mat/impls/baij/seq/baij.h>
3 #include <../src/mat/blockinvert.h>
4 #include <petscbt.h>
5 #include <petscblaslapack.h>
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatIncreaseOverlap_SeqBAIJ"
9 PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
10 {
11   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
12   PetscErrorCode ierr;
13   PetscInt       row,i,j,k,l,m,n,*nidx,isz,val,ival;
14   const PetscInt *idx;
15   PetscInt       start,end,*ai,*aj,bs,*nidx2;
16   PetscBT        table;
17 
18   PetscFunctionBegin;
19   m     = a->mbs;
20   ai    = a->i;
21   aj    = a->j;
22   bs    = A->rmap->bs;
23 
24   if (ov < 0)  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
25 
26   ierr = PetscBTCreate(m,&table);CHKERRQ(ierr);
27   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nidx);CHKERRQ(ierr);
28   ierr = PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);CHKERRQ(ierr);
29 
30   for (i=0; i<is_max; i++) {
31     /* Initialise the two local arrays */
32     isz  = 0;
33     ierr = PetscBTMemzero(m,table);CHKERRQ(ierr);
34 
35     /* Extract the indices, assume there can be duplicate entries */
36     ierr = ISGetIndices(is[i],&idx);CHKERRQ(ierr);
37     ierr = ISGetLocalSize(is[i],&n);CHKERRQ(ierr);
38 
39     /* Enter these into the temp arrays i.e mark table[row], enter row into new index */
40     for (j=0; j<n ; ++j) {
41       ival = idx[j]/bs; /* convert the indices into block indices */
42       if (ival>=m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
43       if (!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;}
44     }
45     ierr = ISRestoreIndices(is[i],&idx);CHKERRQ(ierr);
46     ierr = ISDestroy(&is[i]);CHKERRQ(ierr);
47 
48     k = 0;
49     for (j=0; j<ov; j++) { /* for each overlap*/
50       n = isz;
51       for (; k<n ; k++) { /* do only those rows in nidx[k], which are not done yet */
52         row   = nidx[k];
53         start = ai[row];
54         end   = ai[row+1];
55         for (l = start; l<end ; l++) {
56           val = aj[l];
57           if (!PetscBTLookupSet(table,val)) {nidx[isz++] = val;}
58         }
59       }
60     }
61     /* expand the Index Set */
62     for (j=0; j<isz; j++) {
63       for (k=0; k<bs; k++)
64         nidx2[j*bs+k] = nidx[j]*bs+k;
65     }
66     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,PETSC_COPY_VALUES,is+i);CHKERRQ(ierr);
67   }
68   ierr = PetscBTDestroy(&table);CHKERRQ(ierr);
69   ierr = PetscFree(nidx);CHKERRQ(ierr);
70   ierr = PetscFree(nidx2);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ_Private"
76 PetscErrorCode MatGetSubMatrix_SeqBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
77 {
78   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*c;
79   PetscErrorCode ierr;
80   PetscInt       *smap,i,k,kstart,kend,oldcols = a->nbs,*lens;
81   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
82   const PetscInt *irow,*icol;
83   PetscInt       nrows,ncols,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
84   PetscInt       *aj = a->j,*ai = a->i;
85   MatScalar      *mat_a;
86   Mat            C;
87   PetscBool      flag,sorted;
88 
89   PetscFunctionBegin;
90   ierr = ISSorted(iscol,&sorted);CHKERRQ(ierr);
91   if (!sorted) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");
92 
93   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
94   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
95   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
96   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
97 
98   ierr = PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);CHKERRQ(ierr);
99   ssmap = smap;
100   ierr = PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
101   ierr  = PetscMemzero(smap,oldcols*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<ncols; i++) smap[icol[i]] = i+1;
103   /* determine lens of each row */
104   for (i=0; i<nrows; i++) {
105     kstart  = ai[irow[i]];
106     kend    = kstart + a->ilen[irow[i]];
107     lens[i] = 0;
108       for (k=kstart; k<kend; k++) {
109         if (ssmap[aj[k]]) {
110           lens[i]++;
111         }
112       }
113     }
114   /* Create and fill new matrix */
115   if (scall == MAT_REUSE_MATRIX) {
116     c = (Mat_SeqBAIJ *)((*B)->data);
117 
118     if (c->mbs!=nrows || c->nbs!=ncols || (*B)->rmap->bs!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
119     ierr = PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr);
120     if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
121     ierr = PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));CHKERRQ(ierr);
122     C = *B;
123   } else {
124     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
125     ierr = MatSetSizes(C,nrows*bs,ncols*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
126     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
127     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,lens);CHKERRQ(ierr);
128   }
129   c = (Mat_SeqBAIJ *)(C->data);
130   for (i=0; i<nrows; i++) {
131     row    = irow[i];
132     kstart = ai[row];
133     kend   = kstart + a->ilen[row];
134     mat_i  = c->i[i];
135     mat_j  = c->j + mat_i;
136     mat_a  = c->a + mat_i*bs2;
137     mat_ilen = c->ilen + i;
138     for (k=kstart; k<kend; k++) {
139       if ((tcol=ssmap[a->j[k]])) {
140         *mat_j++ = tcol - 1;
141         ierr     = PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
142         mat_a   += bs2;
143         (*mat_ilen)++;
144       }
145     }
146   }
147 
148   /* Free work space */
149   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
150   ierr = PetscFree(smap);CHKERRQ(ierr);
151   ierr = PetscFree(lens);CHKERRQ(ierr);
152   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
153   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
154 
155   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
156   *B = C;
157   PetscFunctionReturn(0);
158 }
159 
160 #undef __FUNCT__
161 #define __FUNCT__ "MatGetSubMatrix_SeqBAIJ"
162 PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
163 {
164   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
165   IS             is1,is2;
166   PetscErrorCode ierr;
167   PetscInt       *vary,*iary,nrows,ncols,i,bs=A->rmap->bs,count;
168   const PetscInt *irow,*icol;
169 
170   PetscFunctionBegin;
171   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
172   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
173   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
174   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
175 
176   /* Verify if the indices corespond to each element in a block
177    and form the IS with compressed IS */
178   ierr = PetscMalloc2(a->mbs,PetscInt,&vary,a->mbs,PetscInt,&iary);CHKERRQ(ierr);
179   ierr = PetscMemzero(vary,a->mbs*sizeof(PetscInt));CHKERRQ(ierr);
180   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
181   count = 0;
182   for (i=0; i<a->mbs; i++) {
183     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Index set does not match blocks");
184     if (vary[i]==bs) iary[count++] = i;
185   }
186   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,PETSC_COPY_VALUES,&is1);CHKERRQ(ierr);
187 
188   ierr = PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));CHKERRQ(ierr);
189   for (i=0; i<ncols; i++) vary[icol[i]/bs]++;
190   count = 0;
191   for (i=0; i<a->mbs; i++) {
192     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal error in PETSc");
193     if (vary[i]==bs) iary[count++] = i;
194   }
195   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,iary,PETSC_COPY_VALUES,&is2);CHKERRQ(ierr);
196   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
197   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
198   ierr = PetscFree2(vary,iary);CHKERRQ(ierr);
199 
200   ierr = MatGetSubMatrix_SeqBAIJ_Private(A,is1,is2,scall,B);CHKERRQ(ierr);
201   ierr = ISDestroy(&is1);CHKERRQ(ierr);
202   ierr = ISDestroy(&is2);CHKERRQ(ierr);
203   PetscFunctionReturn(0);
204 }
205 
206 #undef __FUNCT__
207 #define __FUNCT__ "MatGetSubMatrices_SeqBAIJ"
208 PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
209 {
210   PetscErrorCode ierr;
211   PetscInt       i;
212 
213   PetscFunctionBegin;
214   if (scall == MAT_INITIAL_MATRIX) {
215     ierr = PetscMalloc((n+1)*sizeof(Mat),B);CHKERRQ(ierr);
216   }
217 
218   for (i=0; i<n; i++) {
219     ierr = MatGetSubMatrix_SeqBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);CHKERRQ(ierr);
220   }
221   PetscFunctionReturn(0);
222 }
223 
224 
225 /* -------------------------------------------------------*/
226 /* Should check that shapes of vectors and matrices match */
227 /* -------------------------------------------------------*/
228 
229 #undef __FUNCT__
230 #define __FUNCT__ "MatMult_SeqBAIJ_1"
231 PetscErrorCode MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz)
232 {
233   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
234   PetscScalar       *z,sum;
235   const PetscScalar *x;
236   const MatScalar   *v;
237   PetscErrorCode    ierr;
238   PetscInt          mbs,i,n,nonzerorow=0;
239   const PetscInt    *idx,*ii,*ridx=PETSC_NULL;
240   PetscBool         usecprow=a->compressedrow.use;
241 
242   PetscFunctionBegin;
243   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
244   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
245 
246   if (usecprow) {
247     mbs  = a->compressedrow.nrows;
248     ii   = a->compressedrow.i;
249     ridx = a->compressedrow.rindex;
250     ierr = PetscMemzero(z,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
251   } else {
252     mbs = a->mbs;
253     ii  = a->i;
254   }
255 
256   for (i=0; i<mbs; i++) {
257     n    = ii[1] - ii[0];
258     v    = a->a + ii[0];
259     idx  = a->j + ii[0];
260     ii++;
261     PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA);   /* Indices for the next row (assumes same size as this one) */
262     PetscPrefetchBlock(v+1*n,1*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
263     sum  = 0.0;
264     PetscSparseDensePlusDot(sum,x,v,idx,n);
265     if (usecprow) {
266       z[ridx[i]] = sum;
267     } else {
268       nonzerorow += (n>0);
269       z[i] = sum;
270     }
271   }
272   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
273   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
274   ierr = PetscLogFlops(2.0*a->nz - nonzerorow);CHKERRQ(ierr);
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatMult_SeqBAIJ_2"
280 PetscErrorCode MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz)
281 {
282   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
283   PetscScalar       *z = 0,sum1,sum2,*zarray;
284   const PetscScalar *x,*xb;
285   PetscScalar       x1,x2;
286   const MatScalar   *v;
287   PetscErrorCode    ierr;
288   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
289   PetscBool         usecprow=a->compressedrow.use;
290 
291   PetscFunctionBegin;
292   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
293   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
294 
295   idx = a->j;
296   v   = a->a;
297   if (usecprow) {
298     mbs  = a->compressedrow.nrows;
299     ii   = a->compressedrow.i;
300     ridx = a->compressedrow.rindex;
301   } else {
302     mbs = a->mbs;
303     ii  = a->i;
304     z   = zarray;
305   }
306 
307   for (i=0; i<mbs; i++) {
308     n  = ii[1] - ii[0]; ii++;
309     sum1 = 0.0; sum2 = 0.0;
310     nonzerorow += (n>0);
311     PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA);   /* Indices for the next row (assumes same size as this one) */
312     PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
313     for (j=0; j<n; j++) {
314       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
315       sum1 += v[0]*x1 + v[2]*x2;
316       sum2 += v[1]*x1 + v[3]*x2;
317       v += 4;
318     }
319     if (usecprow) z = zarray + 2*ridx[i];
320     z[0] = sum1; z[1] = sum2;
321     if (!usecprow) z += 2;
322   }
323   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
324   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
325   ierr = PetscLogFlops(8.0*a->nz - 2.0*nonzerorow);CHKERRQ(ierr);
326   PetscFunctionReturn(0);
327 }
328 
329 #undef __FUNCT__
330 #define __FUNCT__ "MatMult_SeqBAIJ_3"
331 PetscErrorCode MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz)
332 {
333   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
334   PetscScalar       *z = 0,sum1,sum2,sum3,x1,x2,x3,*zarray;
335   const PetscScalar *x,*xb;
336   const MatScalar   *v;
337   PetscErrorCode    ierr;
338   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
339   PetscBool         usecprow=a->compressedrow.use;
340 
341 
342 #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
343 #pragma disjoint(*v,*z,*xb)
344 #endif
345 
346   PetscFunctionBegin;
347   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
348   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
349 
350   idx = a->j;
351   v   = a->a;
352   if (usecprow) {
353     mbs  = a->compressedrow.nrows;
354     ii   = a->compressedrow.i;
355     ridx = a->compressedrow.rindex;
356   } else {
357     mbs = a->mbs;
358     ii  = a->i;
359     z   = zarray;
360   }
361 
362   for (i=0; i<mbs; i++) {
363     n  = ii[1] - ii[0]; ii++;
364     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0;
365     nonzerorow += (n>0);
366     PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA);   /* Indices for the next row (assumes same size as this one) */
367     PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
368     for (j=0; j<n; j++) {
369       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
370       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
371       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
372       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
373       v += 9;
374     }
375     if (usecprow) z = zarray + 3*ridx[i];
376     z[0] = sum1; z[1] = sum2; z[2] = sum3;
377     if (!usecprow) z += 3;
378   }
379   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
380   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
381   ierr = PetscLogFlops(18.0*a->nz - 3.0*nonzerorow);CHKERRQ(ierr);
382   PetscFunctionReturn(0);
383 }
384 
385 #undef __FUNCT__
386 #define __FUNCT__ "MatMult_SeqBAIJ_4"
387 PetscErrorCode MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz)
388 {
389   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
390   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray;
391   const PetscScalar *x,*xb;
392   const MatScalar   *v;
393   PetscErrorCode    ierr;
394   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
395   PetscBool         usecprow=a->compressedrow.use;
396 
397   PetscFunctionBegin;
398   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
399   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
400 
401   idx = a->j;
402   v   = a->a;
403   if (usecprow) {
404     mbs  = a->compressedrow.nrows;
405     ii   = a->compressedrow.i;
406     ridx = a->compressedrow.rindex;
407   } else {
408     mbs = a->mbs;
409     ii  = a->i;
410     z   = zarray;
411   }
412 
413   for (i=0; i<mbs; i++) {
414     n  = ii[1] - ii[0]; ii++;
415     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0;
416     nonzerorow += (n>0);
417     PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA);     /* Indices for the next row (assumes same size as this one) */
418     PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
419     for (j=0; j<n; j++) {
420       xb = x + 4*(*idx++);
421       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
422       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
423       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
424       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
425       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
426       v += 16;
427     }
428     if (usecprow) z = zarray + 4*ridx[i];
429     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
430     if (!usecprow) z += 4;
431   }
432   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
433   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
434   ierr = PetscLogFlops(32.0*a->nz - 4.0*nonzerorow);CHKERRQ(ierr);
435   PetscFunctionReturn(0);
436 }
437 
438 #undef __FUNCT__
439 #define __FUNCT__ "MatMult_SeqBAIJ_5"
440 PetscErrorCode MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz)
441 {
442   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
443   PetscScalar       sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5,*z = 0,*zarray;
444   const PetscScalar *xb,*x;
445   const MatScalar   *v;
446   PetscErrorCode    ierr;
447   const PetscInt    *idx,*ii,*ridx=PETSC_NULL;
448   PetscInt          mbs,i,j,n,nonzerorow=0;
449   PetscBool         usecprow=a->compressedrow.use;
450 
451   PetscFunctionBegin;
452   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
453   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
454 
455   idx = a->j;
456   v   = a->a;
457   if (usecprow) {
458     mbs  = a->compressedrow.nrows;
459     ii   = a->compressedrow.i;
460     ridx = a->compressedrow.rindex;
461   } else {
462     mbs = a->mbs;
463     ii  = a->i;
464     z   = zarray;
465   }
466 
467   for (i=0; i<mbs; i++) {
468     n  = ii[1] - ii[0]; ii++;
469     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0;
470     nonzerorow += (n>0);
471     PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA);     /* Indices for the next row (assumes same size as this one) */
472     PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
473     for (j=0; j<n; j++) {
474       xb = x + 5*(*idx++);
475       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
476       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
477       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
478       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
479       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
480       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
481       v += 25;
482     }
483     if (usecprow) z = zarray + 5*ridx[i];
484     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
485     if (!usecprow) z += 5;
486   }
487   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
488   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
489   ierr = PetscLogFlops(50.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 
494 #undef __FUNCT__
495 #define __FUNCT__ "MatMult_SeqBAIJ_6"
496 PetscErrorCode MatMult_SeqBAIJ_6(Mat A,Vec xx,Vec zz)
497 {
498   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
499   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,sum5,sum6;
500   const PetscScalar *x,*xb;
501   PetscScalar       x1,x2,x3,x4,x5,x6,*zarray;
502   const MatScalar   *v;
503   PetscErrorCode    ierr;
504   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
505   PetscBool         usecprow=a->compressedrow.use;
506 
507   PetscFunctionBegin;
508   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
509   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
510 
511   idx = a->j;
512   v   = a->a;
513   if (usecprow) {
514     mbs  = a->compressedrow.nrows;
515     ii   = a->compressedrow.i;
516     ridx = a->compressedrow.rindex;
517   } else {
518     mbs = a->mbs;
519     ii  = a->i;
520     z   = zarray;
521   }
522 
523   for (i=0; i<mbs; i++) {
524     n  = ii[1] - ii[0]; ii++;
525     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0;
526     nonzerorow += (n>0);
527     PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA);     /* Indices for the next row (assumes same size as this one) */
528     PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
529     for (j=0; j<n; j++) {
530       xb = x + 6*(*idx++);
531       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
532       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
533       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
534       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
535       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
536       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
537       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
538       v += 36;
539     }
540     if (usecprow) z = zarray + 6*ridx[i];
541     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
542     if (!usecprow) z += 6;
543   }
544 
545   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
546   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
547   ierr = PetscLogFlops(72.0*a->nz - 6.0*nonzerorow);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatMult_SeqBAIJ_7"
553 PetscErrorCode MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz)
554 {
555   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
556   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
557   const PetscScalar *x,*xb;
558   PetscScalar       x1,x2,x3,x4,x5,x6,x7,*zarray;
559   const MatScalar   *v;
560   PetscErrorCode    ierr;
561   PetscInt          mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL,nonzerorow=0;
562   PetscBool         usecprow=a->compressedrow.use;
563 
564   PetscFunctionBegin;
565   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
566   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
567 
568   idx = a->j;
569   v   = a->a;
570   if (usecprow) {
571     mbs    = a->compressedrow.nrows;
572     ii     = a->compressedrow.i;
573     ridx = a->compressedrow.rindex;
574   } else {
575     mbs = a->mbs;
576     ii  = a->i;
577     z   = zarray;
578   }
579 
580   for (i=0; i<mbs; i++) {
581     n  = ii[1] - ii[0]; ii++;
582     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
583     nonzerorow += (n>0);
584     PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA);     /* Indices for the next row (assumes same size as this one) */
585     PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
586     for (j=0; j<n; j++) {
587       xb = x + 7*(*idx++);
588       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
589       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
590       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
591       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
592       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
593       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
594       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
595       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
596       v += 49;
597     }
598     if (usecprow) z = zarray + 7*ridx[i];
599     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
600     if (!usecprow) z += 7;
601   }
602 
603   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
604   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
605   ierr = PetscLogFlops(98.0*a->nz - 7.0*nonzerorow);CHKERRQ(ierr);
606   PetscFunctionReturn(0);
607 }
608 
609 /* MatMult_SeqBAIJ_15 version 1: Columns in the block are accessed one at a time */
610 /* Default MatMult for block size 15 */
611 
612 #undef __FUNCT__
613 #define __FUNCT__ "MatMult_SeqBAIJ_15_ver1"
614 PetscErrorCode MatMult_SeqBAIJ_15_ver1(Mat A,Vec xx,Vec zz)
615 {
616   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
617   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,sum10,sum11,sum12,sum13,sum14,sum15;
618   const PetscScalar *x,*xb;
619   PetscScalar       *zarray,xv;
620   const MatScalar   *v;
621   PetscErrorCode    ierr;
622   const PetscInt    *ii,*ij=a->j,*idx;
623   PetscInt          mbs,i,j,k,n,*ridx=PETSC_NULL,nonzerorow=0;
624   PetscBool         usecprow=a->compressedrow.use;
625 
626   PetscFunctionBegin;
627   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
628   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
629 
630   v   = a->a;
631   if (usecprow) {
632     mbs    = a->compressedrow.nrows;
633     ii     = a->compressedrow.i;
634     ridx = a->compressedrow.rindex;
635   } else {
636     mbs = a->mbs;
637     ii  = a->i;
638     z   = zarray;
639   }
640 
641   for (i=0; i<mbs; i++) {
642     n  = ii[i+1] - ii[i];
643     idx = ij + ii[i];
644     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
645     sum8 = 0.0; sum9 = 0.0; sum10 = 0.0; sum11 = 0.0; sum12 = 0.0; sum13 = 0.0; sum14 = 0.0;sum15 = 0.0;
646 
647     nonzerorow += (n>0);
648     for (j=0; j<n; j++) {
649       xb = x + 15*(idx[j]);
650 
651       for (k=0;k<15;k++) {
652         xv    =  xb[k];
653         sum1  += v[0]*xv;
654         sum2  += v[1]*xv;
655         sum3  += v[2]*xv;
656         sum4  += v[3]*xv;
657         sum5  += v[4]*xv;
658         sum6  += v[5]*xv;
659         sum7  += v[6]*xv;
660         sum8  += v[7]*xv;
661         sum9  += v[8]*xv;
662         sum10 += v[9]*xv;
663         sum11 += v[10]*xv;
664         sum12 += v[11]*xv;
665         sum13 += v[12]*xv;
666         sum14 += v[13]*xv;
667         sum15 += v[14]*xv;
668         v += 15;
669       }
670     }
671     if (usecprow) z = zarray + 15*ridx[i];
672     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
673     z[7] = sum8; z[8] = sum9; z[9] = sum10; z[10] = sum11; z[11] = sum12; z[12] = sum13; z[13] = sum14;z[14] = sum15;
674 
675     if (!usecprow) z += 15;
676   }
677 
678   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
679   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
680   ierr = PetscLogFlops(450.0*a->nz - 15.0*nonzerorow);CHKERRQ(ierr);
681   PetscFunctionReturn(0);
682 }
683 
684 /* MatMult_SeqBAIJ_15_ver2 : Columns in the block are accessed in sets of 4,4,4,3 */
685 #undef __FUNCT__
686 #define __FUNCT__ "MatMult_SeqBAIJ_15_ver2"
687 PetscErrorCode MatMult_SeqBAIJ_15_ver2(Mat A,Vec xx,Vec zz)
688 {
689   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
690   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,sum10,sum11,sum12,sum13,sum14,sum15;
691   const PetscScalar *x,*xb;
692   PetscScalar        x1,x2,x3,x4,*zarray;
693   const MatScalar   *v;
694   PetscErrorCode    ierr;
695   const PetscInt    *ii,*ij=a->j,*idx;
696   PetscInt          mbs,i,j,n,*ridx=PETSC_NULL,nonzerorow=0;
697   PetscBool         usecprow=a->compressedrow.use;
698 
699   PetscFunctionBegin;
700   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
701   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
702 
703   v   = a->a;
704   if (usecprow) {
705     mbs    = a->compressedrow.nrows;
706     ii     = a->compressedrow.i;
707     ridx = a->compressedrow.rindex;
708   } else {
709     mbs = a->mbs;
710     ii  = a->i;
711     z   = zarray;
712   }
713 
714   for (i=0; i<mbs; i++) {
715     n  = ii[i+1] - ii[i];
716     idx = ij + ii[i];
717     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
718     sum8 = 0.0; sum9 = 0.0; sum10 = 0.0; sum11 = 0.0; sum12 = 0.0; sum13 = 0.0; sum14 = 0.0;sum15 = 0.0;
719 
720     nonzerorow += (n>0);
721     for (j=0; j<n; j++) {
722       xb = x + 15*(idx[j]);
723       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
724 
725       sum1  += v[0]*x1 + v[15]*x2 + v[30]*x3   + v[45]*x4;
726       sum2  += v[1]*x1 + v[16]*x2 + v[31]*x3   + v[46]*x4;
727       sum3  += v[2]*x1 + v[17]*x2 + v[32]*x3  + v[47]*x4;
728       sum4  += v[3]*x1 + v[18]*x2 + v[33]*x3  + v[48]*x4;
729       sum5  += v[4]*x1 + v[19]*x2 + v[34]*x3   + v[49]*x4;
730       sum6  += v[5]*x1 + v[20]*x2 + v[35]*x3   + v[50]*x4;
731       sum7  += v[6]*x1 + v[21]*x2 + v[36]*x3  + v[51]*x4;
732       sum8  += v[7]*x1 + v[22]*x2 + v[37]*x3  + v[52]*x4;
733       sum9  += v[8]*x1 + v[23]*x2 + v[38]*x3   + v[53]*x4;
734       sum10 += v[9]*x1 + v[24]*x2 + v[39]*x3   + v[54]*x4;
735       sum11 += v[10]*x1 + v[25]*x2 + v[40]*x3  + v[55]*x4;
736       sum12 += v[11]*x1 + v[26]*x2 + v[41]*x3  + v[56]*x4;
737       sum13 += v[12]*x1 + v[27]*x2 + v[42]*x3   + v[57]*x4;
738       sum14 += v[13]*x1 + v[28]*x2 + v[43]*x3   + v[58]*x4;
739       sum15 += v[14]*x1 + v[29]*x2 + v[44]*x3  + v[59]*x4;
740 
741       v += 60;
742 
743       x1 = xb[4]; x2 = xb[5]; x3 = xb[6]; x4 = xb[7];
744 
745       sum1  += v[0]*x1 + v[15]*x2 + v[30]*x3   + v[45]*x4;
746       sum2  += v[1]*x1 + v[16]*x2 + v[31]*x3   + v[46]*x4;
747       sum3  += v[2]*x1 + v[17]*x2 + v[32]*x3  + v[47]*x4;
748       sum4  += v[3]*x1 + v[18]*x2 + v[33]*x3  + v[48]*x4;
749       sum5  += v[4]*x1 + v[19]*x2 + v[34]*x3   + v[49]*x4;
750       sum6  += v[5]*x1 + v[20]*x2 + v[35]*x3   + v[50]*x4;
751       sum7  += v[6]*x1 + v[21]*x2 + v[36]*x3  + v[51]*x4;
752       sum8  += v[7]*x1 + v[22]*x2 + v[37]*x3  + v[52]*x4;
753       sum9  += v[8]*x1 + v[23]*x2 + v[38]*x3   + v[53]*x4;
754       sum10 += v[9]*x1 + v[24]*x2 + v[39]*x3   + v[54]*x4;
755       sum11 += v[10]*x1 + v[25]*x2 + v[40]*x3  + v[55]*x4;
756       sum12 += v[11]*x1 + v[26]*x2 + v[41]*x3  + v[56]*x4;
757       sum13 += v[12]*x1 + v[27]*x2 + v[42]*x3   + v[57]*x4;
758       sum14 += v[13]*x1 + v[28]*x2 + v[43]*x3   + v[58]*x4;
759       sum15 += v[14]*x1 + v[29]*x2 + v[44]*x3  + v[59]*x4;
760       v += 60;
761 
762       x1 = xb[8]; x2 = xb[9]; x3 = xb[10]; x4 = xb[11];
763       sum1  += v[0]*x1 + v[15]*x2 + v[30]*x3   + v[45]*x4;
764       sum2  += v[1]*x1 + v[16]*x2 + v[31]*x3   + v[46]*x4;
765       sum3  += v[2]*x1 + v[17]*x2 + v[32]*x3  + v[47]*x4;
766       sum4  += v[3]*x1 + v[18]*x2 + v[33]*x3  + v[48]*x4;
767       sum5  += v[4]*x1 + v[19]*x2 + v[34]*x3   + v[49]*x4;
768       sum6  += v[5]*x1 + v[20]*x2 + v[35]*x3   + v[50]*x4;
769       sum7  += v[6]*x1 + v[21]*x2 + v[36]*x3  + v[51]*x4;
770       sum8  += v[7]*x1 + v[22]*x2 + v[37]*x3  + v[52]*x4;
771       sum9  += v[8]*x1 + v[23]*x2 + v[38]*x3   + v[53]*x4;
772       sum10 += v[9]*x1 + v[24]*x2 + v[39]*x3   + v[54]*x4;
773       sum11 += v[10]*x1 + v[25]*x2 + v[40]*x3  + v[55]*x4;
774       sum12 += v[11]*x1 + v[26]*x2 + v[41]*x3  + v[56]*x4;
775       sum13 += v[12]*x1 + v[27]*x2 + v[42]*x3   + v[57]*x4;
776       sum14 += v[13]*x1 + v[28]*x2 + v[43]*x3   + v[58]*x4;
777       sum15 += v[14]*x1 + v[29]*x2 + v[44]*x3  + v[59]*x4;
778       v  += 60;
779 
780       x1 = xb[12]; x2 = xb[13]; x3 = xb[14];
781       sum1  += v[0]*x1 + v[15]*x2 + v[30]*x3;
782       sum2  += v[1]*x1 + v[16]*x2 + v[31]*x3;
783       sum3  += v[2]*x1 + v[17]*x2 + v[32]*x3;
784       sum4  += v[3]*x1 + v[18]*x2 + v[33]*x3;
785       sum5  += v[4]*x1 + v[19]*x2 + v[34]*x3;
786       sum6  += v[5]*x1 + v[20]*x2 + v[35]*x3;
787       sum7  += v[6]*x1 + v[21]*x2 + v[36]*x3;
788       sum8  += v[7]*x1 + v[22]*x2 + v[37]*x3;
789       sum9  += v[8]*x1 + v[23]*x2 + v[38]*x3;
790       sum10 += v[9]*x1 + v[24]*x2 + v[39]*x3;
791       sum11 += v[10]*x1 + v[25]*x2 + v[40]*x3;
792       sum12 += v[11]*x1 + v[26]*x2 + v[41]*x3;
793       sum13 += v[12]*x1 + v[27]*x2 + v[42]*x3;
794       sum14 += v[13]*x1 + v[28]*x2 + v[43]*x3;
795       sum15 += v[14]*x1 + v[29]*x2 + v[44]*x3;
796       v += 45;
797     }
798     if (usecprow) z = zarray + 15*ridx[i];
799     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
800     z[7] = sum8; z[8] = sum9; z[9] = sum10; z[10] = sum11; z[11] = sum12; z[12] = sum13; z[13] = sum14;z[14] = sum15;
801 
802     if (!usecprow) z += 15;
803   }
804 
805   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
806   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
807   ierr = PetscLogFlops(450.0*a->nz - 15.0*nonzerorow);CHKERRQ(ierr);
808   PetscFunctionReturn(0);
809 }
810 
811 /* MatMult_SeqBAIJ_15_ver3 : Columns in the block are accessed in sets of 8,7 */
812 #undef __FUNCT__
813 #define __FUNCT__ "MatMult_SeqBAIJ_15_ver3"
814 PetscErrorCode MatMult_SeqBAIJ_15_ver3(Mat A,Vec xx,Vec zz)
815 {
816   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
817   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,sum10,sum11,sum12,sum13,sum14,sum15;
818   const PetscScalar *x,*xb;
819   PetscScalar        x1,x2,x3,x4,x5,x6,x7,x8,*zarray;
820   const MatScalar   *v;
821   PetscErrorCode    ierr;
822   const PetscInt    *ii,*ij=a->j,*idx;
823   PetscInt          mbs,i,j,n,*ridx=PETSC_NULL,nonzerorow=0;
824   PetscBool         usecprow=a->compressedrow.use;
825 
826   PetscFunctionBegin;
827   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
828   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
829 
830   v   = a->a;
831   if (usecprow) {
832     mbs    = a->compressedrow.nrows;
833     ii     = a->compressedrow.i;
834     ridx = a->compressedrow.rindex;
835   } else {
836     mbs = a->mbs;
837     ii  = a->i;
838     z   = zarray;
839   }
840 
841   for (i=0; i<mbs; i++) {
842     n  = ii[i+1] - ii[i];
843     idx = ij + ii[i];
844     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
845     sum8 = 0.0; sum9 = 0.0; sum10 = 0.0; sum11 = 0.0; sum12 = 0.0; sum13 = 0.0; sum14 = 0.0;sum15 = 0.0;
846 
847     nonzerorow += (n>0);
848     for (j=0; j<n; j++) {
849       xb = x + 15*(idx[j]);
850       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
851       x8 = xb[7];
852 
853       sum1  += v[0]*x1 + v[15]*x2  + v[30]*x3  + v[45]*x4 + v[60]*x5 + v[75]*x6 + v[90]*x7 + v[105]*x8;
854       sum2  += v[1]*x1 + v[16]*x2  + v[31]*x3  + v[46]*x4 + v[61]*x5 + v[76]*x6 + v[91]*x7 + v[106]*x8;
855       sum3  += v[2]*x1 + v[17]*x2  + v[32]*x3  + v[47]*x4 + v[62]*x5 + v[77]*x6 + v[92]*x7 + v[107]*x8;
856       sum4  += v[3]*x1 + v[18]*x2 + v[33]*x3  + v[48]*x4 + v[63]*x5 + v[78]*x6 + v[93]*x7 + v[108]*x8;
857       sum5  += v[4]*x1 + v[19]*x2 + v[34]*x3  + v[49]*x4 + v[64]*x5 + v[79]*x6 + v[94]*x7 + v[109]*x8;
858       sum6  += v[5]*x1 + v[20]*x2 + v[35]*x3  + v[50]*x4 + v[65]*x5 + v[80]*x6 + v[95]*x7 + v[110]*x8;
859       sum7  += v[6]*x1 + v[21]*x2 + v[36]*x3  + v[51]*x4 + v[66]*x5 + v[81]*x6 + v[96]*x7 + v[111]*x8;
860       sum8  += v[7]*x1 + v[22]*x2  + v[37]*x3  + v[52]*x4 + v[67]*x5 + v[82]*x6 + v[97]*x7 + v[112]*x8;
861       sum9  += v[8]*x1 + v[23]*x2  + v[38]*x3  + v[53]*x4 + v[68]*x5 + v[83]*x6 + v[98]*x7 + v[113]*x8;
862       sum10 += v[9]*x1 + v[24]*x2  + v[39]*x3  + v[54]*x4 + v[69]*x5 + v[84]*x6 + v[99]*x7 + v[114]*x8;
863       sum11 += v[10]*x1 + v[25]*x2 + v[40]*x3  + v[55]*x4 + v[70]*x5 + v[85]*x6 + v[100]*x7 + v[115]*x8;
864       sum12 += v[11]*x1 + v[26]*x2 + v[41]*x3  + v[56]*x4 + v[71]*x5 + v[86]*x6 + v[101]*x7 + v[116]*x8;
865       sum13 += v[12]*x1 + v[27]*x2 + v[42]*x3  + v[57]*x4 + v[72]*x5 + v[87]*x6 + v[102]*x7 + v[117]*x8;
866       sum14 += v[13]*x1 + v[28]*x2 + v[43]*x3  + v[58]*x4 + v[73]*x5 + v[88]*x6 + v[103]*x7 + v[118]*x8;
867       sum15 += v[14]*x1 + v[29]*x2 + v[44]*x3  + v[59]*x4 + v[74]*x5 + v[89]*x6 + v[104]*x7 + v[119]*x8;
868       v += 120;
869 
870       x1 = xb[8]; x2 = xb[9]; x3 = xb[10]; x4 = xb[11]; x5 = xb[12]; x6 = xb[13]; x7 = xb[14];
871 
872       sum1  += v[0]*x1 + v[15]*x2  + v[30]*x3  + v[45]*x4 + v[60]*x5 + v[75]*x6 + v[90]*x7;
873       sum2  += v[1]*x1 + v[16]*x2  + v[31]*x3  + v[46]*x4 + v[61]*x5 + v[76]*x6 + v[91]*x7;
874       sum3  += v[2]*x1 + v[17]*x2  + v[32]*x3  + v[47]*x4 + v[62]*x5 + v[77]*x6 + v[92]*x7;
875       sum4  += v[3]*x1 + v[18]*x2 + v[33]*x3  + v[48]*x4 + v[63]*x5 + v[78]*x6 + v[93]*x7;
876       sum5  += v[4]*x1 + v[19]*x2 + v[34]*x3  + v[49]*x4 + v[64]*x5 + v[79]*x6 + v[94]*x7;
877       sum6  += v[5]*x1 + v[20]*x2 + v[35]*x3  + v[50]*x4 + v[65]*x5 + v[80]*x6 + v[95]*x7;
878       sum7  += v[6]*x1 + v[21]*x2 + v[36]*x3  + v[51]*x4 + v[66]*x5 + v[81]*x6 + v[96]*x7;
879       sum8  += v[7]*x1 + v[22]*x2  + v[37]*x3  + v[52]*x4 + v[67]*x5 + v[82]*x6 + v[97]*x7;
880       sum9  += v[8]*x1 + v[23]*x2  + v[38]*x3  + v[53]*x4 + v[68]*x5 + v[83]*x6 + v[98]*x7;
881       sum10 += v[9]*x1 + v[24]*x2  + v[39]*x3  + v[54]*x4 + v[69]*x5 + v[84]*x6 + v[99]*x7;
882       sum11 += v[10]*x1 + v[25]*x2 + v[40]*x3  + v[55]*x4 + v[70]*x5 + v[85]*x6 + v[100]*x7;
883       sum12 += v[11]*x1 + v[26]*x2 + v[41]*x3  + v[56]*x4 + v[71]*x5 + v[86]*x6 + v[101]*x7;
884       sum13 += v[12]*x1 + v[27]*x2 + v[42]*x3  + v[57]*x4 + v[72]*x5 + v[87]*x6 + v[102]*x7;
885       sum14 += v[13]*x1 + v[28]*x2 + v[43]*x3  + v[58]*x4 + v[73]*x5 + v[88]*x6 + v[103]*x7;
886       sum15 += v[14]*x1 + v[29]*x2 + v[44]*x3  + v[59]*x4 + v[74]*x5 + v[89]*x6 + v[104]*x7;
887       v += 105;
888     }
889     if (usecprow) z = zarray + 15*ridx[i];
890     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
891     z[7] = sum8; z[8] = sum9; z[9] = sum10; z[10] = sum11; z[11] = sum12; z[12] = sum13; z[13] = sum14;z[14] = sum15;
892 
893     if (!usecprow) z += 15;
894   }
895 
896   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
897   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
898   ierr = PetscLogFlops(450.0*a->nz - 15.0*nonzerorow);CHKERRQ(ierr);
899   PetscFunctionReturn(0);
900 }
901 
902 /* MatMult_SeqBAIJ_15_ver4 : All columns in the block are accessed at once */
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatMult_SeqBAIJ_15_ver4"
906 PetscErrorCode MatMult_SeqBAIJ_15_ver4(Mat A,Vec xx,Vec zz)
907 {
908   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
909   PetscScalar       *z = 0,sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9,sum10,sum11,sum12,sum13,sum14,sum15;
910   const PetscScalar *x,*xb;
911   PetscScalar        x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,*zarray;
912   const MatScalar   *v;
913   PetscErrorCode    ierr;
914   const PetscInt    *ii,*ij=a->j,*idx;
915   PetscInt          mbs,i,j,n,*ridx=PETSC_NULL,nonzerorow=0;
916   PetscBool         usecprow=a->compressedrow.use;
917 
918   PetscFunctionBegin;
919   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
920   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
921 
922   v   = a->a;
923   if (usecprow) {
924     mbs    = a->compressedrow.nrows;
925     ii     = a->compressedrow.i;
926     ridx = a->compressedrow.rindex;
927   } else {
928     mbs = a->mbs;
929     ii  = a->i;
930     z   = zarray;
931   }
932 
933   for (i=0; i<mbs; i++) {
934     n  = ii[i+1] - ii[i];
935     idx = ij + ii[i];
936     sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0;
937     sum8 = 0.0; sum9 = 0.0; sum10 = 0.0; sum11 = 0.0; sum12 = 0.0; sum13 = 0.0; sum14 = 0.0;sum15 = 0.0;
938 
939     nonzerorow += (n>0);
940     for (j=0; j<n; j++) {
941       xb = x + 15*(idx[j]);
942       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
943       x8 = xb[7]; x9 = xb[8]; x10 = xb[9]; x11 = xb[10]; x12 = xb[11]; x13 = xb[12]; x14 = xb[13];x15 = xb[14];
944 
945       sum1 +=  v[0]*x1  + v[15]*x2 + v[30]*x3 + v[45]*x4 + v[60]*x5 + v[75]*x6 + v[90]*x7  + v[105]*x8 + v[120]*x9 + v[135]*x10 + v[150]*x11 + v[165]*x12 + v[180]*x13 + v[195]*x14 + v[210]*x15;
946       sum2 +=  v[1]*x1  + v[16]*x2 + v[31]*x3 + v[46]*x4 + v[61]*x5 + v[76]*x6 + v[91]*x7  + v[106]*x8 + v[121]*x9 + v[136]*x10 + v[151]*x11 + v[166]*x12 + v[181]*x13 + v[196]*x14 + v[211]*x15;
947       sum3 +=  v[2]*x1  + v[17]*x2 + v[32]*x3 + v[47]*x4 + v[62]*x5 + v[77]*x6 + v[92]*x7  + v[107]*x8 + v[122]*x9 + v[137]*x10 + v[152]*x11 + v[167]*x12 + v[182]*x13 + v[197]*x14 + v[212]*x15;
948       sum4 +=  v[3]*x1  + v[18]*x2 + v[33]*x3 + v[48]*x4 + v[63]*x5 + v[78]*x6 + v[93]*x7  + v[108]*x8 + v[123]*x9 + v[138]*x10 + v[153]*x11 + v[168]*x12 + v[183]*x13 + v[198]*x14 + v[213]*x15;
949       sum5  += v[4]*x1  + v[19]*x2 + v[34]*x3 + v[49]*x4 + v[64]*x5 + v[79]*x6 + v[94]*x7  + v[109]*x8 + v[124]*x9 + v[139]*x10 + v[154]*x11 + v[169]*x12 + v[184]*x13 + v[199]*x14 + v[214]*x15;
950       sum6  += v[5]*x1  + v[20]*x2 + v[35]*x3 + v[50]*x4 + v[65]*x5 + v[80]*x6 + v[95]*x7  + v[110]*x8 + v[125]*x9 + v[140]*x10 + v[155]*x11 + v[170]*x12 + v[185]*x13 + v[200]*x14 + v[215]*x15;
951       sum7  += v[6]*x1  + v[21]*x2 + v[36]*x3 + v[51]*x4 + v[66]*x5 + v[81]*x6 + v[96]*x7  + v[111]*x8 + v[126]*x9 + v[141]*x10 + v[156]*x11 + v[171]*x12 + v[186]*x13 + v[201]*x14 + v[216]*x15;
952       sum8  += v[7]*x1  + v[22]*x2 + v[37]*x3 + v[52]*x4 + v[67]*x5 + v[82]*x6 + v[97]*x7  + v[112]*x8 + v[127]*x9 + v[142]*x10 + v[157]*x11 + v[172]*x12 + v[187]*x13 + v[202]*x14 + v[217]*x15;
953       sum9  += v[8]*x1  + v[23]*x2 + v[38]*x3 + v[53]*x4 + v[68]*x5 + v[83]*x6 + v[98]*x7  + v[113]*x8 + v[128]*x9 + v[143]*x10 + v[158]*x11 + v[173]*x12 + v[188]*x13 + v[203]*x14 + v[218]*x15;
954       sum10 += v[9]*x1  + v[24]*x2 + v[39]*x3 + v[54]*x4 + v[69]*x5 + v[84]*x6 + v[99]*x7  + v[114]*x8 + v[129]*x9 + v[144]*x10 + v[159]*x11 + v[174]*x12 + v[189]*x13 + v[204]*x14 + v[219]*x15;
955       sum11 += v[10]*x1 + v[25]*x2 + v[40]*x3 + v[55]*x4 + v[70]*x5 + v[85]*x6 + v[100]*x7 + v[115]*x8 + v[130]*x9 + v[145]*x10 + v[160]*x11 + v[175]*x12 + v[190]*x13 + v[205]*x14 + v[220]*x15;
956       sum12 += v[11]*x1 + v[26]*x2 + v[41]*x3 + v[56]*x4 + v[71]*x5 + v[86]*x6 + v[101]*x7 + v[116]*x8 + v[131]*x9 + v[146]*x10 + v[161]*x11 + v[176]*x12 + v[191]*x13 + v[206]*x14 + v[221]*x15;
957       sum13 += v[12]*x1 + v[27]*x2 + v[42]*x3 + v[57]*x4 + v[72]*x5 + v[87]*x6 + v[102]*x7 + v[117]*x8 + v[132]*x9 + v[147]*x10 + v[162]*x11 + v[177]*x12 + v[192]*x13 + v[207]*x14 + v[222]*x15;
958       sum14 += v[13]*x1 + v[28]*x2 + v[43]*x3 + v[58]*x4 + v[73]*x5 + v[88]*x6 + v[103]*x7 + v[118]*x8 + v[133]*x9 + v[148]*x10 + v[163]*x11 + v[178]*x12 + v[193]*x13 + v[208]*x14 + v[223]*x15;
959       sum15 += v[14]*x1 + v[29]*x2 + v[44]*x3 + v[59]*x4 + v[74]*x5 + v[89]*x6 + v[104]*x7 + v[119]*x8 + v[134]*x9 + v[149]*x10 + v[164]*x11 + v[179]*x12 + v[194]*x13 + v[209]*x14 + v[224]*x15;
960       v += 225;
961     }
962     if (usecprow) z = zarray + 15*ridx[i];
963     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
964     z[7] = sum8; z[8] = sum9; z[9] = sum10; z[10] = sum11; z[11] = sum12; z[12] = sum13; z[13] = sum14;z[14] = sum15;
965 
966     if (!usecprow) z += 15;
967   }
968 
969   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
970   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
971   ierr = PetscLogFlops(450.0*a->nz - 15.0*nonzerorow);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 
976 /*
977     This will not work with MatScalar == float because it calls the BLAS
978 */
979 #undef __FUNCT__
980 #define __FUNCT__ "MatMult_SeqBAIJ_N"
981 PetscErrorCode MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz)
982 {
983   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
984   PetscScalar    *x,*z = 0,*xb,*work,*workt,*zarray;
985   MatScalar      *v;
986   PetscErrorCode ierr;
987   PetscInt       mbs,i,*idx,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2;
988   PetscInt       ncols,k,*ridx=PETSC_NULL,nonzerorow=0;
989   PetscBool      usecprow=a->compressedrow.use;
990 
991   PetscFunctionBegin;
992   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
993   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
994 
995   idx = a->j;
996   v   = a->a;
997   if (usecprow) {
998     mbs  = a->compressedrow.nrows;
999     ii   = a->compressedrow.i;
1000     ridx = a->compressedrow.rindex;
1001   } else {
1002     mbs = a->mbs;
1003     ii  = a->i;
1004     z   = zarray;
1005   }
1006 
1007   if (!a->mult_work) {
1008     k    = PetscMax(A->rmap->n,A->cmap->n);
1009     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1010   }
1011   work = a->mult_work;
1012   for (i=0; i<mbs; i++) {
1013     n     = ii[1] - ii[0]; ii++;
1014     ncols = n*bs;
1015     workt = work;
1016     nonzerorow += (n>0);
1017     for (j=0; j<n; j++) {
1018       xb = x + bs*(*idx++);
1019       for (k=0; k<bs; k++) workt[k] = xb[k];
1020       workt += bs;
1021     }
1022     if (usecprow) z = zarray + bs*ridx[i];
1023     PetscKernel_w_gets_Ar_times_v(bs,ncols,work,v,z);
1024     /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); */
1025     v += n*bs2;
1026     if (!usecprow) z += bs;
1027   }
1028   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1029   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1030   ierr = PetscLogFlops(2.0*a->nz*bs2 - bs*nonzerorow);CHKERRQ(ierr);
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 #undef __FUNCT__
1035 #define __FUNCT__ "MatMultAdd_SeqBAIJ_1"
1036 PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
1037 {
1038   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
1039   const PetscScalar  *x;
1040   PetscScalar        *y,*z,sum;
1041   const MatScalar    *v;
1042   PetscErrorCode     ierr;
1043   PetscInt           mbs=a->mbs,i,n,*ridx=PETSC_NULL,nonzerorow=0;
1044   const PetscInt     *idx,*ii;
1045   PetscBool          usecprow=a->compressedrow.use;
1046 
1047   PetscFunctionBegin;
1048   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1049   ierr = VecGetArray(yy,&y);CHKERRQ(ierr);
1050   if (zz != yy) {
1051     ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
1052   } else {
1053     z = y;
1054   }
1055 
1056   idx = a->j;
1057   v   = a->a;
1058   if (usecprow) {
1059     if (zz != yy) {
1060       ierr = PetscMemcpy(z,y,mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1061     }
1062     mbs  = a->compressedrow.nrows;
1063     ii   = a->compressedrow.i;
1064     ridx = a->compressedrow.rindex;
1065   } else {
1066     ii  = a->i;
1067   }
1068 
1069   for (i=0; i<mbs; i++) {
1070     n    = ii[1] - ii[0];
1071     ii++;
1072     if (!usecprow) {
1073       nonzerorow += (n>0);
1074       sum = y[i];
1075     } else {
1076       sum = y[ridx[i]];
1077     }
1078     PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */
1079     PetscPrefetchBlock(v+n,n,0,PETSC_PREFETCH_HINT_NTA);   /* Entries for the next row */
1080     PetscSparseDensePlusDot(sum,x,v,idx,n);
1081     v += n;
1082     idx += n;
1083     if (usecprow) {
1084       z[ridx[i]] = sum;
1085     } else {
1086       z[i] = sum;
1087     }
1088   }
1089   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1090   ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr);
1091   if (zz != yy) {
1092     ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
1093   }
1094   ierr = PetscLogFlops(2.0*a->nz - nonzerorow);CHKERRQ(ierr);
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 #undef __FUNCT__
1099 #define __FUNCT__ "MatMultAdd_SeqBAIJ_2"
1100 PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
1101 {
1102   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1103   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2;
1104   PetscScalar    x1,x2,*yarray,*zarray;
1105   MatScalar      *v;
1106   PetscErrorCode ierr;
1107   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
1108   PetscBool      usecprow=a->compressedrow.use;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1112   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
1113   if (zz != yy) {
1114     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
1115   } else {
1116     zarray = yarray;
1117   }
1118 
1119   idx = a->j;
1120   v   = a->a;
1121   if (usecprow) {
1122     if (zz != yy) {
1123       ierr = PetscMemcpy(zarray,yarray,2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1124     }
1125     mbs  = a->compressedrow.nrows;
1126     ii   = a->compressedrow.i;
1127     ridx = a->compressedrow.rindex;
1128     if (zz != yy) {
1129       ierr = PetscMemcpy(zarray,yarray,a->mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1130     }
1131   } else {
1132     ii  = a->i;
1133     y   = yarray;
1134     z   = zarray;
1135   }
1136 
1137   for (i=0; i<mbs; i++) {
1138     n  = ii[1] - ii[0]; ii++;
1139     if (usecprow) {
1140       z = zarray + 2*ridx[i];
1141       y = yarray + 2*ridx[i];
1142     }
1143     sum1 = y[0]; sum2 = y[1];
1144     PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA);   /* Indices for the next row (assumes same size as this one) */
1145     PetscPrefetchBlock(v+4*n,4*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
1146     for (j=0; j<n; j++) {
1147       xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1];
1148       sum1 += v[0]*x1 + v[2]*x2;
1149       sum2 += v[1]*x1 + v[3]*x2;
1150       v += 4;
1151     }
1152     z[0] = sum1; z[1] = sum2;
1153     if (!usecprow) {
1154       z += 2; y += 2;
1155     }
1156   }
1157   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1158   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
1159   if (zz != yy) {
1160     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1161   }
1162   ierr = PetscLogFlops(4.0*a->nz);CHKERRQ(ierr);
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 #undef __FUNCT__
1167 #define __FUNCT__ "MatMultAdd_SeqBAIJ_3"
1168 PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
1169 {
1170   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1171   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,x1,x2,x3,*yarray,*zarray;
1172   MatScalar      *v;
1173   PetscErrorCode ierr;
1174   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
1175   PetscBool      usecprow=a->compressedrow.use;
1176 
1177   PetscFunctionBegin;
1178   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1179   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
1180   if (zz != yy) {
1181     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
1182   } else {
1183     zarray = yarray;
1184   }
1185 
1186   idx = a->j;
1187   v   = a->a;
1188   if (usecprow) {
1189     if (zz != yy) {
1190       ierr = PetscMemcpy(zarray,yarray,3*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1191     }
1192     mbs  = a->compressedrow.nrows;
1193     ii   = a->compressedrow.i;
1194     ridx = a->compressedrow.rindex;
1195   } else {
1196     ii  = a->i;
1197     y   = yarray;
1198     z   = zarray;
1199   }
1200 
1201   for (i=0; i<mbs; i++) {
1202     n  = ii[1] - ii[0]; ii++;
1203     if (usecprow) {
1204       z = zarray + 3*ridx[i];
1205       y = yarray + 3*ridx[i];
1206     }
1207     sum1 = y[0]; sum2 = y[1]; sum3 = y[2];
1208     PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA);   /* Indices for the next row (assumes same size as this one) */
1209     PetscPrefetchBlock(v+9*n,9*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
1210     for (j=0; j<n; j++) {
1211       xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1212       sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3;
1213       sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3;
1214       sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3;
1215       v += 9;
1216     }
1217     z[0] = sum1; z[1] = sum2; z[2] = sum3;
1218     if (!usecprow) {
1219       z += 3; y += 3;
1220     }
1221   }
1222   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1223   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
1224   if (zz != yy) {
1225     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1226   }
1227   ierr = PetscLogFlops(18.0*a->nz);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatMultAdd_SeqBAIJ_4"
1233 PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
1234 {
1235   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1236   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*yarray,*zarray;
1237   MatScalar      *v;
1238   PetscErrorCode ierr;
1239   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
1240   PetscBool      usecprow=a->compressedrow.use;
1241 
1242   PetscFunctionBegin;
1243   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1244   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
1245   if (zz != yy) {
1246     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
1247   } else {
1248     zarray = yarray;
1249   }
1250 
1251   idx   = a->j;
1252   v     = a->a;
1253   if (usecprow) {
1254     if (zz != yy) {
1255       ierr = PetscMemcpy(zarray,yarray,4*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1256     }
1257     mbs  = a->compressedrow.nrows;
1258     ii   = a->compressedrow.i;
1259     ridx = a->compressedrow.rindex;
1260   } else {
1261     ii  = a->i;
1262     y   = yarray;
1263     z   = zarray;
1264   }
1265 
1266   for (i=0; i<mbs; i++) {
1267     n  = ii[1] - ii[0]; ii++;
1268     if (usecprow) {
1269       z = zarray + 4*ridx[i];
1270       y = yarray + 4*ridx[i];
1271     }
1272     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3];
1273     PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA);     /* Indices for the next row (assumes same size as this one) */
1274     PetscPrefetchBlock(v+16*n,16*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
1275     for (j=0; j<n; j++) {
1276       xb = x + 4*(*idx++);
1277       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1278       sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3   + v[12]*x4;
1279       sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3   + v[13]*x4;
1280       sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3  + v[14]*x4;
1281       sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3  + v[15]*x4;
1282       v += 16;
1283     }
1284     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4;
1285     if (!usecprow) {
1286       z += 4; y += 4;
1287     }
1288   }
1289   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1290   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
1291   if (zz != yy) {
1292     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1293   }
1294   ierr = PetscLogFlops(32.0*a->nz);CHKERRQ(ierr);
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 #undef __FUNCT__
1299 #define __FUNCT__ "MatMultAdd_SeqBAIJ_5"
1300 PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
1301 {
1302   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1303   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5;
1304   PetscScalar    *yarray,*zarray;
1305   MatScalar      *v;
1306   PetscErrorCode ierr;
1307   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
1308   PetscBool      usecprow=a->compressedrow.use;
1309 
1310   PetscFunctionBegin;
1311   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1312   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
1313   if (zz != yy) {
1314     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
1315   } else {
1316     zarray = yarray;
1317   }
1318 
1319   idx = a->j;
1320   v   = a->a;
1321   if (usecprow) {
1322     if (zz != yy) {
1323       ierr = PetscMemcpy(zarray,yarray,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1324     }
1325     mbs  = a->compressedrow.nrows;
1326     ii   = a->compressedrow.i;
1327     ridx = a->compressedrow.rindex;
1328   } else {
1329     ii  = a->i;
1330     y   = yarray;
1331     z   = zarray;
1332   }
1333 
1334   for (i=0; i<mbs; i++) {
1335     n  = ii[1] - ii[0]; ii++;
1336     if (usecprow) {
1337       z = zarray + 5*ridx[i];
1338       y = yarray + 5*ridx[i];
1339     }
1340     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4];
1341     PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA);     /* Indices for the next row (assumes same size as this one) */
1342     PetscPrefetchBlock(v+25*n,25*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
1343     for (j=0; j<n; j++) {
1344       xb = x + 5*(*idx++);
1345       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4];
1346       sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3  + v[15]*x4 + v[20]*x5;
1347       sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3  + v[16]*x4 + v[21]*x5;
1348       sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3  + v[17]*x4 + v[22]*x5;
1349       sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3  + v[18]*x4 + v[23]*x5;
1350       sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3  + v[19]*x4 + v[24]*x5;
1351       v += 25;
1352     }
1353     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5;
1354     if (!usecprow) {
1355       z += 5; y += 5;
1356     }
1357   }
1358   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1359   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
1360   if (zz != yy) {
1361     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1362   }
1363   ierr = PetscLogFlops(50.0*a->nz);CHKERRQ(ierr);
1364   PetscFunctionReturn(0);
1365 }
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatMultAdd_SeqBAIJ_6"
1368 PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
1369 {
1370   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1371   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6;
1372   PetscScalar    x1,x2,x3,x4,x5,x6,*yarray,*zarray;
1373   MatScalar      *v;
1374   PetscErrorCode ierr;
1375   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
1376   PetscBool      usecprow=a->compressedrow.use;
1377 
1378   PetscFunctionBegin;
1379   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1380   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
1381   if (zz != yy) {
1382     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
1383   } else {
1384     zarray = yarray;
1385   }
1386 
1387   idx = a->j;
1388   v   = a->a;
1389   if (usecprow) {
1390     if (zz != yy) {
1391       ierr = PetscMemcpy(zarray,yarray,6*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1392     }
1393     mbs  = a->compressedrow.nrows;
1394     ii   = a->compressedrow.i;
1395     ridx = a->compressedrow.rindex;
1396   } else {
1397     ii  = a->i;
1398     y   = yarray;
1399     z   = zarray;
1400   }
1401 
1402   for (i=0; i<mbs; i++) {
1403     n  = ii[1] - ii[0]; ii++;
1404     if (usecprow) {
1405       z = zarray + 6*ridx[i];
1406       y = yarray + 6*ridx[i];
1407     }
1408     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5];
1409     PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA);     /* Indices for the next row (assumes same size as this one) */
1410     PetscPrefetchBlock(v+36*n,36*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
1411     for (j=0; j<n; j++) {
1412       xb = x + 6*(*idx++);
1413       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5];
1414       sum1 += v[0]*x1 + v[6]*x2  + v[12]*x3  + v[18]*x4 + v[24]*x5 + v[30]*x6;
1415       sum2 += v[1]*x1 + v[7]*x2  + v[13]*x3  + v[19]*x4 + v[25]*x5 + v[31]*x6;
1416       sum3 += v[2]*x1 + v[8]*x2  + v[14]*x3  + v[20]*x4 + v[26]*x5 + v[32]*x6;
1417       sum4 += v[3]*x1 + v[9]*x2  + v[15]*x3  + v[21]*x4 + v[27]*x5 + v[33]*x6;
1418       sum5 += v[4]*x1 + v[10]*x2 + v[16]*x3  + v[22]*x4 + v[28]*x5 + v[34]*x6;
1419       sum6 += v[5]*x1 + v[11]*x2 + v[17]*x3  + v[23]*x4 + v[29]*x5 + v[35]*x6;
1420       v += 36;
1421     }
1422     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6;
1423     if (!usecprow) {
1424       z += 6; y += 6;
1425     }
1426   }
1427   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1428   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
1429   if (zz != yy) {
1430     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1431   }
1432   ierr = PetscLogFlops(72.0*a->nz);CHKERRQ(ierr);
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatMultAdd_SeqBAIJ_7"
1438 PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1439 {
1440   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1441   PetscScalar    *x,*y = 0,*z = 0,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7;
1442   PetscScalar    x1,x2,x3,x4,x5,x6,x7,*yarray,*zarray;
1443   MatScalar      *v;
1444   PetscErrorCode ierr;
1445   PetscInt       mbs=a->mbs,i,*idx,*ii,j,n,*ridx=PETSC_NULL;
1446   PetscBool      usecprow=a->compressedrow.use;
1447 
1448   PetscFunctionBegin;
1449   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1450   ierr = VecGetArray(yy,&yarray);CHKERRQ(ierr);
1451   if (zz != yy) {
1452     ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
1453   } else {
1454     zarray = yarray;
1455   }
1456 
1457   idx = a->j;
1458   v   = a->a;
1459   if (usecprow) {
1460     if (zz != yy) {
1461       ierr = PetscMemcpy(zarray,yarray,7*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
1462     }
1463     mbs  = a->compressedrow.nrows;
1464     ii   = a->compressedrow.i;
1465     ridx = a->compressedrow.rindex;
1466   } else {
1467     ii  = a->i;
1468     y   = yarray;
1469     z   = zarray;
1470   }
1471 
1472   for (i=0; i<mbs; i++) {
1473     n  = ii[1] - ii[0]; ii++;
1474     if (usecprow) {
1475       z = zarray + 7*ridx[i];
1476       y = yarray + 7*ridx[i];
1477     }
1478     sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6];
1479     PetscPrefetchBlock(idx+n,n,0,PETSC_PREFETCH_HINT_NTA);     /* Indices for the next row (assumes same size as this one) */
1480     PetscPrefetchBlock(v+49*n,49*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */
1481     for (j=0; j<n; j++) {
1482       xb = x + 7*(*idx++);
1483       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6];
1484       sum1 += v[0]*x1 + v[7]*x2  + v[14]*x3  + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
1485       sum2 += v[1]*x1 + v[8]*x2  + v[15]*x3  + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
1486       sum3 += v[2]*x1 + v[9]*x2  + v[16]*x3  + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
1487       sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3  + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
1488       sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3  + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
1489       sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3  + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
1490       sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3  + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
1491       v += 49;
1492     }
1493     z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7;
1494     if (!usecprow) {
1495       z += 7; y += 7;
1496     }
1497   }
1498   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1499   ierr = VecRestoreArray(yy,&yarray);CHKERRQ(ierr);
1500   if (zz != yy) {
1501     ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1502   }
1503   ierr = PetscLogFlops(98.0*a->nz);CHKERRQ(ierr);
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 #undef __FUNCT__
1508 #define __FUNCT__ "MatMultAdd_SeqBAIJ_N"
1509 PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1510 {
1511   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1512   PetscScalar    *x,*z = 0,*xb,*work,*workt,*zarray;
1513   MatScalar      *v;
1514   PetscErrorCode ierr;
1515   PetscInt       mbs,i,*idx,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2;
1516   PetscInt       ncols,k,*ridx=PETSC_NULL;
1517   PetscBool      usecprow=a->compressedrow.use;
1518 
1519   PetscFunctionBegin;
1520   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
1521   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1522   ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr);
1523 
1524   idx = a->j;
1525   v   = a->a;
1526   if (usecprow) {
1527     mbs    = a->compressedrow.nrows;
1528     ii     = a->compressedrow.i;
1529     ridx = a->compressedrow.rindex;
1530   } else {
1531     mbs = a->mbs;
1532     ii  = a->i;
1533     z   = zarray;
1534   }
1535 
1536   if (!a->mult_work) {
1537     k    = PetscMax(A->rmap->n,A->cmap->n);
1538     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1539   }
1540   work = a->mult_work;
1541   for (i=0; i<mbs; i++) {
1542     n     = ii[1] - ii[0]; ii++;
1543     ncols = n*bs;
1544     workt = work;
1545     for (j=0; j<n; j++) {
1546       xb = x + bs*(*idx++);
1547       for (k=0; k<bs; k++) workt[k] = xb[k];
1548       workt += bs;
1549     }
1550     if (usecprow) z = zarray + bs*ridx[i];
1551     PetscKernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
1552     /* BLASgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); */
1553     v += n*bs2;
1554     if (!usecprow) {
1555       z += bs;
1556     }
1557   }
1558   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1559   ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr);
1560   ierr = PetscLogFlops(2.0*a->nz*bs2);CHKERRQ(ierr);
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 #undef __FUNCT__
1565 #define __FUNCT__ "MatMultHermitianTranspose_SeqBAIJ"
1566 PetscErrorCode MatMultHermitianTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz)
1567 {
1568   PetscScalar    zero = 0.0;
1569   PetscErrorCode ierr;
1570 
1571   PetscFunctionBegin;
1572   ierr = VecSet(zz,zero);CHKERRQ(ierr);
1573   ierr = MatMultHermitianTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr);
1574   PetscFunctionReturn(0);
1575 }
1576 
1577 #undef __FUNCT__
1578 #define __FUNCT__ "MatMultTranspose_SeqBAIJ"
1579 PetscErrorCode MatMultTranspose_SeqBAIJ(Mat A,Vec xx,Vec zz)
1580 {
1581   PetscScalar    zero = 0.0;
1582   PetscErrorCode ierr;
1583 
1584   PetscFunctionBegin;
1585   ierr = VecSet(zz,zero);CHKERRQ(ierr);
1586   ierr = MatMultTransposeAdd_SeqBAIJ(A,xx,zz,zz);CHKERRQ(ierr);
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatMultHermitianTransposeAdd_SeqBAIJ"
1592 PetscErrorCode MatMultHermitianTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1593 {
1594   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1595   PetscScalar       *zb,*x,*z,*xb = 0,x1,x2,x3,x4,x5;
1596   MatScalar         *v;
1597   PetscErrorCode    ierr;
1598   PetscInt          mbs,i,*idx,*ii,rval,bs=A->rmap->bs,j,n,bs2=a->bs2,*ib,*ridx=PETSC_NULL;
1599   Mat_CompressedRow cprow = a->compressedrow;
1600   PetscBool         usecprow=cprow.use;
1601 
1602   PetscFunctionBegin;
1603   if (yy != zz) { ierr = VecCopy(yy,zz);CHKERRQ(ierr); }
1604   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1605   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
1606 
1607   idx = a->j;
1608   v   = a->a;
1609   if (usecprow) {
1610     mbs  = cprow.nrows;
1611     ii   = cprow.i;
1612     ridx = cprow.rindex;
1613   } else {
1614     mbs=a->mbs;
1615     ii = a->i;
1616     xb = x;
1617   }
1618 
1619   switch (bs) {
1620   case 1:
1621     for (i=0; i<mbs; i++) {
1622       if (usecprow) xb = x + ridx[i];
1623       x1 = xb[0];
1624       ib = idx + ii[0];
1625       n  = ii[1] - ii[0]; ii++;
1626       for (j=0; j<n; j++) {
1627         rval    = ib[j];
1628         z[rval] += PetscConj(*v) * x1;
1629         v++;
1630       }
1631       if (!usecprow) xb++;
1632     }
1633     break;
1634   case 2:
1635     for (i=0; i<mbs; i++) {
1636       if (usecprow) xb = x + 2*ridx[i];
1637       x1 = xb[0]; x2 = xb[1];
1638       ib = idx + ii[0];
1639       n  = ii[1] - ii[0]; ii++;
1640       for (j=0; j<n; j++) {
1641         rval      = ib[j]*2;
1642         z[rval++] += PetscConj(v[0])*x1 + PetscConj(v[1])*x2;
1643         z[rval++] += PetscConj(v[2])*x1 + PetscConj(v[3])*x2;
1644         v  += 4;
1645       }
1646       if (!usecprow) xb += 2;
1647     }
1648     break;
1649   case 3:
1650     for (i=0; i<mbs; i++) {
1651       if (usecprow) xb = x + 3*ridx[i];
1652       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1653       ib = idx + ii[0];
1654       n  = ii[1] - ii[0]; ii++;
1655       for (j=0; j<n; j++) {
1656         rval      = ib[j]*3;
1657         z[rval++] += PetscConj(v[0])*x1 + PetscConj(v[1])*x2 + PetscConj(v[2])*x3;
1658         z[rval++] += PetscConj(v[3])*x1 + PetscConj(v[4])*x2 + PetscConj(v[5])*x3;
1659         z[rval++] += PetscConj(v[6])*x1 + PetscConj(v[7])*x2 + PetscConj(v[8])*x3;
1660         v  += 9;
1661       }
1662       if (!usecprow) xb += 3;
1663     }
1664     break;
1665   case 4:
1666     for (i=0; i<mbs; i++) {
1667       if (usecprow) xb = x + 4*ridx[i];
1668       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1669       ib = idx + ii[0];
1670       n  = ii[1] - ii[0]; ii++;
1671       for (j=0; j<n; j++) {
1672         rval      = ib[j]*4;
1673         z[rval++] +=  PetscConj(v[0])*x1 + PetscConj(v[1])*x2  + PetscConj(v[2])*x3  + PetscConj(v[3])*x4;
1674         z[rval++] +=  PetscConj(v[4])*x1 + PetscConj(v[5])*x2  + PetscConj(v[6])*x3  + PetscConj(v[7])*x4;
1675         z[rval++] +=  PetscConj(v[8])*x1 + PetscConj(v[9])*x2  + PetscConj(v[10])*x3 + PetscConj(v[11])*x4;
1676         z[rval++] += PetscConj(v[12])*x1 + PetscConj(v[13])*x2 + PetscConj(v[14])*x3 + PetscConj(v[15])*x4;
1677         v  += 16;
1678       }
1679       if (!usecprow) xb += 4;
1680     }
1681     break;
1682   case 5:
1683     for (i=0; i<mbs; i++) {
1684       if (usecprow) xb = x + 5*ridx[i];
1685       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1686       x4 = xb[3]; x5 = xb[4];
1687       ib = idx + ii[0];
1688       n  = ii[1] - ii[0]; ii++;
1689       for (j=0; j<n; j++) {
1690         rval      = ib[j]*5;
1691         z[rval++] +=  PetscConj(v[0])*x1 +  PetscConj(v[1])*x2 +  PetscConj(v[2])*x3 +  PetscConj(v[3])*x4 +  PetscConj(v[4])*x5;
1692         z[rval++] +=  PetscConj(v[5])*x1 +  PetscConj(v[6])*x2 +  PetscConj(v[7])*x3 +  PetscConj(v[8])*x4 +  PetscConj(v[9])*x5;
1693         z[rval++] += PetscConj(v[10])*x1 + PetscConj(v[11])*x2 + PetscConj(v[12])*x3 + PetscConj(v[13])*x4 + PetscConj(v[14])*x5;
1694         z[rval++] += PetscConj(v[15])*x1 + PetscConj(v[16])*x2 + PetscConj(v[17])*x3 + PetscConj(v[18])*x4 + PetscConj(v[19])*x5;
1695         z[rval++] += PetscConj(v[20])*x1 + PetscConj(v[21])*x2 + PetscConj(v[22])*x3 + PetscConj(v[23])*x4 + PetscConj(v[24])*x5;
1696         v  += 25;
1697       }
1698       if (!usecprow) xb += 5;
1699     }
1700     break;
1701   default: {      /* block sizes larger than 5 by 5 are handled by BLAS */
1702       PetscInt     ncols,k;
1703       PetscScalar  *work,*workt,*xtmp;
1704 
1705       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size larger than 5 is not supported yet");
1706       if (!a->mult_work) {
1707         k = PetscMax(A->rmap->n,A->cmap->n);
1708         ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1709       }
1710       work = a->mult_work;
1711       xtmp = x;
1712       for (i=0; i<mbs; i++) {
1713         n     = ii[1] - ii[0]; ii++;
1714         ncols = n*bs;
1715         ierr  = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1716         if (usecprow) {
1717           xtmp = x + bs*ridx[i];
1718         }
1719         PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work);
1720         /* BLASgemv_("T",&bs,&ncols,&_DOne,v,&bs,xtmp,&_One,&_DOne,work,&_One); */
1721         v += n*bs2;
1722         if (!usecprow) xtmp += bs;
1723         workt = work;
1724         for (j=0; j<n; j++) {
1725           zb = z + bs*(*idx++);
1726           for (k=0; k<bs; k++) zb[k] += workt[k] ;
1727           workt += bs;
1728         }
1729       }
1730     }
1731   }
1732   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1733   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
1734   ierr = PetscLogFlops(2.0*a->nz*a->bs2);CHKERRQ(ierr);
1735   PetscFunctionReturn(0);
1736 }
1737 
1738 #undef __FUNCT__
1739 #define __FUNCT__ "MatMultTransposeAdd_SeqBAIJ"
1740 PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1741 {
1742   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1743   PetscScalar       *zb,*x,*z,*xb = 0,x1,x2,x3,x4,x5;
1744   MatScalar         *v;
1745   PetscErrorCode    ierr;
1746   PetscInt          mbs,i,*idx,*ii,rval,bs=A->rmap->bs,j,n,bs2=a->bs2,*ib,*ridx=PETSC_NULL;
1747   Mat_CompressedRow cprow = a->compressedrow;
1748   PetscBool         usecprow=cprow.use;
1749 
1750   PetscFunctionBegin;
1751   if (yy != zz) { ierr = VecCopy(yy,zz);CHKERRQ(ierr); }
1752   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1753   ierr = VecGetArray(zz,&z);CHKERRQ(ierr);
1754 
1755   idx = a->j;
1756   v   = a->a;
1757   if (usecprow) {
1758     mbs  = cprow.nrows;
1759     ii   = cprow.i;
1760     ridx = cprow.rindex;
1761   } else {
1762     mbs=a->mbs;
1763     ii = a->i;
1764     xb = x;
1765   }
1766 
1767   switch (bs) {
1768   case 1:
1769     for (i=0; i<mbs; i++) {
1770       if (usecprow) xb = x + ridx[i];
1771       x1 = xb[0];
1772       ib = idx + ii[0];
1773       n  = ii[1] - ii[0]; ii++;
1774       for (j=0; j<n; j++) {
1775         rval    = ib[j];
1776         z[rval] += *v * x1;
1777         v++;
1778       }
1779       if (!usecprow) xb++;
1780     }
1781     break;
1782   case 2:
1783     for (i=0; i<mbs; i++) {
1784       if (usecprow) xb = x + 2*ridx[i];
1785       x1 = xb[0]; x2 = xb[1];
1786       ib = idx + ii[0];
1787       n  = ii[1] - ii[0]; ii++;
1788       for (j=0; j<n; j++) {
1789         rval      = ib[j]*2;
1790         z[rval++] += v[0]*x1 + v[1]*x2;
1791         z[rval++] += v[2]*x1 + v[3]*x2;
1792         v  += 4;
1793       }
1794       if (!usecprow) xb += 2;
1795     }
1796     break;
1797   case 3:
1798     for (i=0; i<mbs; i++) {
1799       if (usecprow) xb = x + 3*ridx[i];
1800       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1801       ib = idx + ii[0];
1802       n  = ii[1] - ii[0]; ii++;
1803       for (j=0; j<n; j++) {
1804         rval      = ib[j]*3;
1805         z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3;
1806         z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3;
1807         z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3;
1808         v  += 9;
1809       }
1810       if (!usecprow) xb += 3;
1811     }
1812     break;
1813   case 4:
1814     for (i=0; i<mbs; i++) {
1815       if (usecprow) xb = x + 4*ridx[i];
1816       x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
1817       ib = idx + ii[0];
1818       n  = ii[1] - ii[0]; ii++;
1819       for (j=0; j<n; j++) {
1820         rval      = ib[j]*4;
1821         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4;
1822         z[rval++] +=  v[4]*x1 +  v[5]*x2 +  v[6]*x3 +  v[7]*x4;
1823         z[rval++] +=  v[8]*x1 +  v[9]*x2 + v[10]*x3 + v[11]*x4;
1824         z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
1825         v  += 16;
1826       }
1827       if (!usecprow) xb += 4;
1828     }
1829     break;
1830   case 5:
1831     for (i=0; i<mbs; i++) {
1832       if (usecprow) xb = x + 5*ridx[i];
1833       x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
1834       x4 = xb[3]; x5 = xb[4];
1835       ib = idx + ii[0];
1836       n  = ii[1] - ii[0]; ii++;
1837       for (j=0; j<n; j++) {
1838         rval      = ib[j]*5;
1839         z[rval++] +=  v[0]*x1 +  v[1]*x2 +  v[2]*x3 +  v[3]*x4 +  v[4]*x5;
1840         z[rval++] +=  v[5]*x1 +  v[6]*x2 +  v[7]*x3 +  v[8]*x4 +  v[9]*x5;
1841         z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5;
1842         z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5;
1843         z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5;
1844         v  += 25;
1845       }
1846       if (!usecprow) xb += 5;
1847     }
1848     break;
1849   default: {      /* block sizes larger then 5 by 5 are handled by BLAS */
1850       PetscInt     ncols,k;
1851       PetscScalar  *work,*workt,*xtmp;
1852 
1853       if (!a->mult_work) {
1854         k = PetscMax(A->rmap->n,A->cmap->n);
1855         ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1856       }
1857       work = a->mult_work;
1858       xtmp = x;
1859       for (i=0; i<mbs; i++) {
1860         n     = ii[1] - ii[0]; ii++;
1861         ncols = n*bs;
1862         ierr  = PetscMemzero(work,ncols*sizeof(PetscScalar));CHKERRQ(ierr);
1863         if (usecprow) {
1864           xtmp = x + bs*ridx[i];
1865         }
1866         PetscKernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,xtmp,v,work);
1867         /* BLASgemv_("T",&bs,&ncols,&_DOne,v,&bs,xtmp,&_One,&_DOne,work,&_One); */
1868         v += n*bs2;
1869         if (!usecprow) xtmp += bs;
1870         workt = work;
1871         for (j=0; j<n; j++) {
1872           zb = z + bs*(*idx++);
1873           for (k=0; k<bs; k++) zb[k] += workt[k] ;
1874           workt += bs;
1875         }
1876       }
1877     }
1878   }
1879   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1880   ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr);
1881   ierr = PetscLogFlops(2.0*a->nz*a->bs2);CHKERRQ(ierr);
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 #undef __FUNCT__
1886 #define __FUNCT__ "MatScale_SeqBAIJ"
1887 PetscErrorCode MatScale_SeqBAIJ(Mat inA,PetscScalar alpha)
1888 {
1889   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1890   PetscInt       totalnz = a->bs2*a->nz;
1891   PetscScalar    oalpha = alpha;
1892   PetscErrorCode ierr;
1893   PetscBLASInt   one = 1,tnz;
1894 
1895   PetscFunctionBegin;
1896   ierr = PetscBLASIntCast(totalnz,&tnz);CHKERRQ(ierr);
1897   BLASscal_(&tnz,&oalpha,a->a,&one);
1898   ierr = PetscLogFlops(totalnz);CHKERRQ(ierr);
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 #undef __FUNCT__
1903 #define __FUNCT__ "MatNorm_SeqBAIJ"
1904 PetscErrorCode MatNorm_SeqBAIJ(Mat A,NormType type,PetscReal *norm)
1905 {
1906   PetscErrorCode ierr;
1907   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1908   MatScalar      *v = a->a;
1909   PetscReal      sum = 0.0;
1910   PetscInt       i,j,k,bs=A->rmap->bs,nz=a->nz,bs2=a->bs2,k1;
1911 
1912   PetscFunctionBegin;
1913   if (type == NORM_FROBENIUS) {
1914     for (i=0; i< bs2*nz; i++) {
1915       sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1916     }
1917     *norm = PetscSqrtReal(sum);
1918   } else if (type == NORM_1) { /* maximum column sum */
1919     PetscReal *tmp;
1920     PetscInt  *bcol = a->j;
1921     ierr = PetscMalloc((A->cmap->n+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1922     ierr = PetscMemzero(tmp,A->cmap->n*sizeof(PetscReal));CHKERRQ(ierr);
1923     for (i=0; i<nz; i++) {
1924       for (j=0; j<bs; j++) {
1925         k1 = bs*(*bcol) + j; /* column index */
1926         for (k=0; k<bs; k++) {
1927           tmp[k1] += PetscAbsScalar(*v); v++;
1928         }
1929       }
1930       bcol++;
1931     }
1932     *norm = 0.0;
1933     for (j=0; j<A->cmap->n; j++) {
1934       if (tmp[j] > *norm) *norm = tmp[j];
1935     }
1936     ierr = PetscFree(tmp);CHKERRQ(ierr);
1937   } else if (type == NORM_INFINITY) { /* maximum row sum */
1938     *norm = 0.0;
1939     for (k=0; k<bs; k++) {
1940       for (j=0; j<a->mbs; j++) {
1941         v = a->a + bs2*a->i[j] + k;
1942         sum = 0.0;
1943         for (i=0; i<a->i[j+1]-a->i[j]; i++) {
1944           for (k1=0; k1<bs; k1++) {
1945             sum += PetscAbsScalar(*v);
1946             v   += bs;
1947           }
1948         }
1949         if (sum > *norm) *norm = sum;
1950       }
1951     }
1952   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
1953   PetscFunctionReturn(0);
1954 }
1955 
1956 
1957 #undef __FUNCT__
1958 #define __FUNCT__ "MatEqual_SeqBAIJ"
1959 PetscErrorCode MatEqual_SeqBAIJ(Mat A,Mat B,PetscBool * flg)
1960 {
1961   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *)A->data,*b = (Mat_SeqBAIJ *)B->data;
1962   PetscErrorCode ierr;
1963 
1964   PetscFunctionBegin;
1965   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1966   if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1967     *flg = PETSC_FALSE;
1968     PetscFunctionReturn(0);
1969   }
1970 
1971   /* if the a->i are the same */
1972   ierr = PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1973   if (!*flg) {
1974     PetscFunctionReturn(0);
1975   }
1976 
1977   /* if a->j are the same */
1978   ierr = PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);CHKERRQ(ierr);
1979   if (!*flg) {
1980     PetscFunctionReturn(0);
1981   }
1982   /* if a->a are the same */
1983   ierr = PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(B->rmap->bs)*sizeof(PetscScalar),flg);CHKERRQ(ierr);
1984   PetscFunctionReturn(0);
1985 
1986 }
1987 
1988 #undef __FUNCT__
1989 #define __FUNCT__ "MatGetDiagonal_SeqBAIJ"
1990 PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat A,Vec v)
1991 {
1992   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1993   PetscErrorCode ierr;
1994   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1995   PetscScalar    *x,zero = 0.0;
1996   MatScalar      *aa,*aa_j;
1997 
1998   PetscFunctionBegin;
1999   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2000   bs   = A->rmap->bs;
2001   aa   = a->a;
2002   ai   = a->i;
2003   aj   = a->j;
2004   ambs = a->mbs;
2005   bs2  = a->bs2;
2006 
2007   ierr = VecSet(v,zero);CHKERRQ(ierr);
2008   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2009   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2010   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2011   for (i=0; i<ambs; i++) {
2012     for (j=ai[i]; j<ai[i+1]; j++) {
2013       if (aj[j] == i) {
2014         row  = i*bs;
2015         aa_j = aa+j*bs2;
2016         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
2017         break;
2018       }
2019     }
2020   }
2021   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 #undef __FUNCT__
2026 #define __FUNCT__ "MatDiagonalScale_SeqBAIJ"
2027 PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr)
2028 {
2029   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
2030   const PetscScalar *l,*r,*li,*ri;
2031   PetscScalar       x;
2032   MatScalar         *aa, *v;
2033   PetscErrorCode    ierr;
2034   PetscInt          i,j,k,lm,rn,M,m,n,mbs,tmp,bs,bs2,iai;
2035   const PetscInt    *ai,*aj;
2036 
2037   PetscFunctionBegin;
2038   ai  = a->i;
2039   aj  = a->j;
2040   aa  = a->a;
2041   m   = A->rmap->n;
2042   n   = A->cmap->n;
2043   bs  = A->rmap->bs;
2044   mbs = a->mbs;
2045   bs2 = a->bs2;
2046   if (ll) {
2047     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
2048     ierr = VecGetLocalSize(ll,&lm);CHKERRQ(ierr);
2049     if (lm != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
2050     for (i=0; i<mbs; i++) { /* for each block row */
2051       M  = ai[i+1] - ai[i];
2052       li = l + i*bs;
2053       v  = aa + bs2*ai[i];
2054       for (j=0; j<M; j++) { /* for each block */
2055         for (k=0; k<bs2; k++) {
2056           (*v++) *= li[k%bs];
2057         }
2058       }
2059     }
2060     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
2061     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
2062   }
2063 
2064   if (rr) {
2065     ierr = VecGetArrayRead(rr,&r);CHKERRQ(ierr);
2066     ierr = VecGetLocalSize(rr,&rn);CHKERRQ(ierr);
2067     if (rn != n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vector wrong length");
2068     for (i=0; i<mbs; i++) { /* for each block row */
2069       iai = ai[i];
2070       M   = ai[i+1] - iai;
2071       v   = aa + bs2*iai;
2072       for (j=0; j<M; j++) { /* for each block */
2073         ri = r + bs*aj[iai+j];
2074         for (k=0; k<bs; k++) {
2075           x = ri[k];
2076           for (tmp=0; tmp<bs; tmp++) v[tmp] *= x;
2077           v += bs;
2078         }
2079       }
2080     }
2081     ierr = VecRestoreArrayRead(rr,&r);CHKERRQ(ierr);
2082     ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
2083   }
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 
2088 #undef __FUNCT__
2089 #define __FUNCT__ "MatGetInfo_SeqBAIJ"
2090 PetscErrorCode MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info)
2091 {
2092   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2093 
2094   PetscFunctionBegin;
2095   info->block_size     = a->bs2;
2096   info->nz_allocated   = a->bs2*a->maxnz;
2097   info->nz_used        = a->bs2*a->nz;
2098   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
2099   info->assemblies   = A->num_ass;
2100   info->mallocs      = A->info.mallocs;
2101   info->memory       = ((PetscObject)A)->mem;
2102   if (A->factortype) {
2103     info->fill_ratio_given  = A->info.fill_ratio_given;
2104     info->fill_ratio_needed = A->info.fill_ratio_needed;
2105     info->factor_mallocs    = A->info.factor_mallocs;
2106   } else {
2107     info->fill_ratio_given  = 0;
2108     info->fill_ratio_needed = 0;
2109     info->factor_mallocs    = 0;
2110   }
2111   PetscFunctionReturn(0);
2112 }
2113 
2114 
2115 #undef __FUNCT__
2116 #define __FUNCT__ "MatZeroEntries_SeqBAIJ"
2117 PetscErrorCode MatZeroEntries_SeqBAIJ(Mat A)
2118 {
2119   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2120   PetscErrorCode ierr;
2121 
2122   PetscFunctionBegin;
2123   ierr = PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));CHKERRQ(ierr);
2124   PetscFunctionReturn(0);
2125 }
2126