xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 65f8aed5f7eaa1e2ef2ddeffe666264e0669c876)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
20 
21 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
22 {
23   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
24   PetscErrorCode ierr;
25   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
26   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
27   PetscReal      shift = 0.0;
28   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
29 
30   PetscFunctionBegin;
31   allowzeropivot = PetscNot(A->erroriffailure);
32 
33   if (a->idiagvalid) {
34     if (values) *values = a->idiag;
35     PetscFunctionReturn(0);
36   }
37   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
38   diag_offset = a->diag;
39   if (!a->idiag) {
40     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
41     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
42   }
43   diag  = a->idiag;
44   if (values) *values = a->idiag;
45   /* factor and invert each block */
46   switch (bs) {
47   case 1:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 1*diag_offset[i];
50       diag[0]  = odiag[0];
51 
52       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
53         if (allowzeropivot) {
54           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
55           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
56           A->factorerror_zeropivot_row   = i;
57           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
58         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
59       }
60 
61       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
62       diag    += 1;
63     }
64     break;
65   case 2:
66     for (i=0; i<mbs; i++) {
67       odiag    = v + 4*diag_offset[i];
68       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
70       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
71       diag    += 4;
72     }
73     break;
74   case 3:
75     for (i=0; i<mbs; i++) {
76       odiag    = v + 9*diag_offset[i];
77       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79       diag[8]  = odiag[8];
80       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
81       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
82       diag    += 9;
83     }
84     break;
85   case 4:
86     for (i=0; i<mbs; i++) {
87       odiag  = v + 16*diag_offset[i];
88       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
89       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
90       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
91       diag  += 16;
92     }
93     break;
94   case 5:
95     for (i=0; i<mbs; i++) {
96       odiag  = v + 25*diag_offset[i];
97       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
98       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
99       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
100       diag  += 25;
101     }
102     break;
103   case 6:
104     for (i=0; i<mbs; i++) {
105       odiag  = v + 36*diag_offset[i];
106       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
107       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
108       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
109       diag  += 36;
110     }
111     break;
112   case 7:
113     for (i=0; i<mbs; i++) {
114       odiag  = v + 49*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
117       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
118       diag  += 49;
119     }
120     break;
121   default:
122     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
123     for (i=0; i<mbs; i++) {
124       odiag  = v + bs2*diag_offset[i];
125       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
126       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
127       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128       diag  += bs2;
129     }
130     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
131   }
132   a->idiagvalid = PETSC_TRUE;
133   PetscFunctionReturn(0);
134 }
135 
136 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
137 {
138   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
139   PetscScalar       *x,*work,*w,*workt,*t;
140   const MatScalar   *v,*aa = a->a, *idiag;
141   const PetscScalar *b,*xb;
142   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
143   PetscErrorCode    ierr;
144   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
145   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
146 
147   PetscFunctionBegin;
148   its = its*lits;
149   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
150   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
151   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
152   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
153   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
154 
155   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
156 
157   if (!m) PetscFunctionReturn(0);
158   diag  = a->diag;
159   idiag = a->idiag;
160   k    = PetscMax(A->rmap->n,A->cmap->n);
161   if (!a->mult_work) {
162     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
163   }
164   if (!a->sor_workt) {
165     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
166   }
167   if (!a->sor_work) {
168     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
169   }
170   work = a->mult_work;
171   t    = a->sor_workt;
172   w    = a->sor_work;
173 
174   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
175   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
176 
177   if (flag & SOR_ZERO_INITIAL_GUESS) {
178     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
179       switch (bs) {
180       case 1:
181         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
182         t[0] = b[0];
183         i2     = 1;
184         idiag += 1;
185         for (i=1; i<m; i++) {
186           v  = aa + ai[i];
187           vi = aj + ai[i];
188           nz = diag[i] - ai[i];
189           s[0] = b[i2];
190           for (j=0; j<nz; j++) {
191             xw[0] = x[vi[j]];
192             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
193           }
194           t[i2] = s[0];
195           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
196           x[i2]  = xw[0];
197           idiag += 1;
198           i2    += 1;
199         }
200         break;
201       case 2:
202         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
203         t[0] = b[0]; t[1] = b[1];
204         i2     = 2;
205         idiag += 4;
206         for (i=1; i<m; i++) {
207           v  = aa + 4*ai[i];
208           vi = aj + ai[i];
209           nz = diag[i] - ai[i];
210           s[0] = b[i2]; s[1] = b[i2+1];
211           for (j=0; j<nz; j++) {
212             idx = 2*vi[j];
213             it  = 4*j;
214             xw[0] = x[idx]; xw[1] = x[1+idx];
215             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
216           }
217           t[i2] = s[0]; t[i2+1] = s[1];
218           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
219           x[i2]   = xw[0]; x[i2+1] = xw[1];
220           idiag  += 4;
221           i2     += 2;
222         }
223         break;
224       case 3:
225         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
226         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
227         i2     = 3;
228         idiag += 9;
229         for (i=1; i<m; i++) {
230           v  = aa + 9*ai[i];
231           vi = aj + ai[i];
232           nz = diag[i] - ai[i];
233           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
234           while (nz--) {
235             idx = 3*(*vi++);
236             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
237             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
238             v  += 9;
239           }
240           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
241           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
242           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
243           idiag  += 9;
244           i2     += 3;
245         }
246         break;
247       case 4:
248         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
249         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
250         i2     = 4;
251         idiag += 16;
252         for (i=1; i<m; i++) {
253           v  = aa + 16*ai[i];
254           vi = aj + ai[i];
255           nz = diag[i] - ai[i];
256           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
257           while (nz--) {
258             idx = 4*(*vi++);
259             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
260             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
261             v  += 16;
262           }
263           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
264           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
265           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
266           idiag  += 16;
267           i2     += 4;
268         }
269         break;
270       case 5:
271         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
272         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
273         i2     = 5;
274         idiag += 25;
275         for (i=1; i<m; i++) {
276           v  = aa + 25*ai[i];
277           vi = aj + ai[i];
278           nz = diag[i] - ai[i];
279           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
280           while (nz--) {
281             idx = 5*(*vi++);
282             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
283             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
284             v  += 25;
285           }
286           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
287           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
288           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
289           idiag  += 25;
290           i2     += 5;
291         }
292         break;
293       case 6:
294         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
295         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
296         i2     = 6;
297         idiag += 36;
298         for (i=1; i<m; i++) {
299           v  = aa + 36*ai[i];
300           vi = aj + ai[i];
301           nz = diag[i] - ai[i];
302           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
303           while (nz--) {
304             idx = 6*(*vi++);
305             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
306             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
307             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
308             v  += 36;
309           }
310           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
311           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
312           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
313           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
314           idiag  += 36;
315           i2     += 6;
316         }
317         break;
318       case 7:
319         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
320         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
321         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
322         i2     = 7;
323         idiag += 49;
324         for (i=1; i<m; i++) {
325           v  = aa + 49*ai[i];
326           vi = aj + ai[i];
327           nz = diag[i] - ai[i];
328           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
329           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
330           while (nz--) {
331             idx = 7*(*vi++);
332             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
333             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
334             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
335             v  += 49;
336           }
337           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
338           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
339           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
340           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
341           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
342           idiag  += 49;
343           i2     += 7;
344         }
345         break;
346       default:
347         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
348         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
349         i2     = bs;
350         idiag += bs2;
351         for (i=1; i<m; i++) {
352           v  = aa + bs2*ai[i];
353           vi = aj + ai[i];
354           nz = diag[i] - ai[i];
355 
356           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
357           /* copy all rows of x that are needed into contiguous space */
358           workt = work;
359           for (j=0; j<nz; j++) {
360             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
361             workt += bs;
362           }
363           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
364           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
365           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
366 
367           idiag += bs2;
368           i2    += bs;
369         }
370         break;
371       }
372       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
374       xb = t;
375     }
376     else xb = b;
377     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
378       idiag = a->idiag+bs2*(a->mbs-1);
379       i2 = bs * (m-1);
380       switch (bs) {
381       case 1:
382         s[0]  = xb[i2];
383         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
384         x[i2] = xw[0];
385         i2   -= 1;
386         for (i=m-2; i>=0; i--) {
387           v  = aa + (diag[i]+1);
388           vi = aj + diag[i] + 1;
389           nz = ai[i+1] - diag[i] - 1;
390           s[0] = xb[i2];
391           for (j=0; j<nz; j++) {
392             xw[0] = x[vi[j]];
393             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
394           }
395           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
396           x[i2]  = xw[0];
397           idiag -= 1;
398           i2    -= 1;
399         }
400         break;
401       case 2:
402         s[0]  = xb[i2]; s[1] = xb[i2+1];
403         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
404         x[i2] = xw[0]; x[i2+1] = xw[1];
405         i2    -= 2;
406         idiag -= 4;
407         for (i=m-2; i>=0; i--) {
408           v  = aa + 4*(diag[i] + 1);
409           vi = aj + diag[i] + 1;
410           nz = ai[i+1] - diag[i] - 1;
411           s[0] = xb[i2]; s[1] = xb[i2+1];
412           for (j=0; j<nz; j++) {
413             idx = 2*vi[j];
414             it  = 4*j;
415             xw[0] = x[idx]; xw[1] = x[1+idx];
416             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
417           }
418           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
419           x[i2]   = xw[0]; x[i2+1] = xw[1];
420           idiag  -= 4;
421           i2     -= 2;
422         }
423         break;
424       case 3:
425         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
426         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
427         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
428         i2    -= 3;
429         idiag -= 9;
430         for (i=m-2; i>=0; i--) {
431           v  = aa + 9*(diag[i]+1);
432           vi = aj + diag[i] + 1;
433           nz = ai[i+1] - diag[i] - 1;
434           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
435           while (nz--) {
436             idx = 3*(*vi++);
437             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
438             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
439             v  += 9;
440           }
441           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
442           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
443           idiag  -= 9;
444           i2     -= 3;
445         }
446         break;
447       case 4:
448         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
449         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
450         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
451         i2    -= 4;
452         idiag -= 16;
453         for (i=m-2; i>=0; i--) {
454           v  = aa + 16*(diag[i]+1);
455           vi = aj + diag[i] + 1;
456           nz = ai[i+1] - diag[i] - 1;
457           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
458           while (nz--) {
459             idx = 4*(*vi++);
460             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
461             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
462             v  += 16;
463           }
464           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
465           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
466           idiag  -= 16;
467           i2     -= 4;
468         }
469         break;
470       case 5:
471         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
472         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
473         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
474         i2    -= 5;
475         idiag -= 25;
476         for (i=m-2; i>=0; i--) {
477           v  = aa + 25*(diag[i]+1);
478           vi = aj + diag[i] + 1;
479           nz = ai[i+1] - diag[i] - 1;
480           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
481           while (nz--) {
482             idx = 5*(*vi++);
483             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
484             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
485             v  += 25;
486           }
487           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
488           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
489           idiag  -= 25;
490           i2     -= 5;
491         }
492         break;
493       case 6:
494         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
495         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
496         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
497         i2    -= 6;
498         idiag -= 36;
499         for (i=m-2; i>=0; i--) {
500           v  = aa + 36*(diag[i]+1);
501           vi = aj + diag[i] + 1;
502           nz = ai[i+1] - diag[i] - 1;
503           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
504           while (nz--) {
505             idx = 6*(*vi++);
506             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
507             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
508             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
509             v  += 36;
510           }
511           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
512           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
513           idiag  -= 36;
514           i2     -= 6;
515         }
516         break;
517       case 7:
518         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
519         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
520         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
521         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
522         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
523         i2    -= 7;
524         idiag -= 49;
525         for (i=m-2; i>=0; i--) {
526           v  = aa + 49*(diag[i]+1);
527           vi = aj + diag[i] + 1;
528           nz = ai[i+1] - diag[i] - 1;
529           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
530           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
531           while (nz--) {
532             idx = 7*(*vi++);
533             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
534             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
535             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
536             v  += 49;
537           }
538           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
539           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
540           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
541           idiag  -= 49;
542           i2     -= 7;
543         }
544         break;
545       default:
546         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
547         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
548         i2    -= bs;
549         idiag -= bs2;
550         for (i=m-2; i>=0; i--) {
551           v  = aa + bs2*(diag[i]+1);
552           vi = aj + diag[i] + 1;
553           nz = ai[i+1] - diag[i] - 1;
554 
555           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
556           /* copy all rows of x that are needed into contiguous space */
557           workt = work;
558           for (j=0; j<nz; j++) {
559             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
560             workt += bs;
561           }
562           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
563           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
564 
565           idiag -= bs2;
566           i2    -= bs;
567         }
568         break;
569       }
570       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
571     }
572     its--;
573   }
574   while (its--) {
575     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
576       idiag = a->idiag;
577       i2 = 0;
578       switch (bs) {
579       case 1:
580         for (i=0; i<m; i++) {
581           v  = aa + ai[i];
582           vi = aj + ai[i];
583           nz = ai[i+1] - ai[i];
584           s[0] = b[i2];
585           for (j=0; j<nz; j++) {
586             xw[0] = x[vi[j]];
587             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
588           }
589           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
590           x[i2] += xw[0];
591           idiag += 1;
592           i2    += 1;
593         }
594         break;
595       case 2:
596         for (i=0; i<m; i++) {
597           v  = aa + 4*ai[i];
598           vi = aj + ai[i];
599           nz = ai[i+1] - ai[i];
600           s[0] = b[i2]; s[1] = b[i2+1];
601           for (j=0; j<nz; j++) {
602             idx = 2*vi[j];
603             it  = 4*j;
604             xw[0] = x[idx]; xw[1] = x[1+idx];
605             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
606           }
607           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
608           x[i2]  += xw[0]; x[i2+1] += xw[1];
609           idiag  += 4;
610           i2     += 2;
611         }
612         break;
613       case 3:
614         for (i=0; i<m; i++) {
615           v  = aa + 9*ai[i];
616           vi = aj + ai[i];
617           nz = ai[i+1] - ai[i];
618           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
619           while (nz--) {
620             idx = 3*(*vi++);
621             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
622             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
623             v  += 9;
624           }
625           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
626           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
627           idiag  += 9;
628           i2     += 3;
629         }
630         break;
631       case 4:
632         for (i=0; i<m; i++) {
633           v  = aa + 16*ai[i];
634           vi = aj + ai[i];
635           nz = ai[i+1] - ai[i];
636           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
637           while (nz--) {
638             idx = 4*(*vi++);
639             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
640             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
641             v  += 16;
642           }
643           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
644           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
645           idiag  += 16;
646           i2     += 4;
647         }
648         break;
649       case 5:
650         for (i=0; i<m; i++) {
651           v  = aa + 25*ai[i];
652           vi = aj + ai[i];
653           nz = ai[i+1] - ai[i];
654           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
655           while (nz--) {
656             idx = 5*(*vi++);
657             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
658             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
659             v  += 25;
660           }
661           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
662           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
663           idiag  += 25;
664           i2     += 5;
665         }
666         break;
667       case 6:
668         for (i=0; i<m; i++) {
669           v  = aa + 36*ai[i];
670           vi = aj + ai[i];
671           nz = ai[i+1] - ai[i];
672           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
673           while (nz--) {
674             idx = 6*(*vi++);
675             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
676             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
677             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
678             v  += 36;
679           }
680           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
681           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
682           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
683           idiag  += 36;
684           i2     += 6;
685         }
686         break;
687       case 7:
688         for (i=0; i<m; i++) {
689           v  = aa + 49*ai[i];
690           vi = aj + ai[i];
691           nz = ai[i+1] - ai[i];
692           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
693           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
694           while (nz--) {
695             idx = 7*(*vi++);
696             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
697             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
698             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
699             v  += 49;
700           }
701           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
702           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
703           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
704           idiag  += 49;
705           i2     += 7;
706         }
707         break;
708       default:
709         for (i=0; i<m; i++) {
710           v  = aa + bs2*ai[i];
711           vi = aj + ai[i];
712           nz = ai[i+1] - ai[i];
713 
714           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
715           /* copy all rows of x that are needed into contiguous space */
716           workt = work;
717           for (j=0; j<nz; j++) {
718             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
719             workt += bs;
720           }
721           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
722           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
723 
724           idiag += bs2;
725           i2    += bs;
726         }
727         break;
728       }
729       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
730     }
731     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
732       idiag = a->idiag+bs2*(a->mbs-1);
733       i2 = bs * (m-1);
734       switch (bs) {
735       case 1:
736         for (i=m-1; i>=0; i--) {
737           v  = aa + ai[i];
738           vi = aj + ai[i];
739           nz = ai[i+1] - ai[i];
740           s[0] = b[i2];
741           for (j=0; j<nz; j++) {
742             xw[0] = x[vi[j]];
743             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
744           }
745           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
746           x[i2] += xw[0];
747           idiag -= 1;
748           i2    -= 1;
749         }
750         break;
751       case 2:
752         for (i=m-1; i>=0; i--) {
753           v  = aa + 4*ai[i];
754           vi = aj + ai[i];
755           nz = ai[i+1] - ai[i];
756           s[0] = b[i2]; s[1] = b[i2+1];
757           for (j=0; j<nz; j++) {
758             idx = 2*vi[j];
759             it  = 4*j;
760             xw[0] = x[idx]; xw[1] = x[1+idx];
761             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
762           }
763           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
764           x[i2]  += xw[0]; x[i2+1] += xw[1];
765           idiag  -= 4;
766           i2     -= 2;
767         }
768         break;
769       case 3:
770         for (i=m-1; i>=0; i--) {
771           v  = aa + 9*ai[i];
772           vi = aj + ai[i];
773           nz = ai[i+1] - ai[i];
774           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
775           while (nz--) {
776             idx = 3*(*vi++);
777             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
778             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
779             v  += 9;
780           }
781           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
782           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
783           idiag  -= 9;
784           i2     -= 3;
785         }
786         break;
787       case 4:
788         for (i=m-1; i>=0; i--) {
789           v  = aa + 16*ai[i];
790           vi = aj + ai[i];
791           nz = ai[i+1] - ai[i];
792           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
793           while (nz--) {
794             idx = 4*(*vi++);
795             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
796             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
797             v  += 16;
798           }
799           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
800           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
801           idiag  -= 16;
802           i2     -= 4;
803         }
804         break;
805       case 5:
806         for (i=m-1; i>=0; i--) {
807           v  = aa + 25*ai[i];
808           vi = aj + ai[i];
809           nz = ai[i+1] - ai[i];
810           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
811           while (nz--) {
812             idx = 5*(*vi++);
813             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
814             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
815             v  += 25;
816           }
817           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
818           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
819           idiag  -= 25;
820           i2     -= 5;
821         }
822         break;
823       case 6:
824         for (i=m-1; i>=0; i--) {
825           v  = aa + 36*ai[i];
826           vi = aj + ai[i];
827           nz = ai[i+1] - ai[i];
828           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
829           while (nz--) {
830             idx = 6*(*vi++);
831             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
832             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
833             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
834             v  += 36;
835           }
836           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
837           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
838           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
839           idiag  -= 36;
840           i2     -= 6;
841         }
842         break;
843       case 7:
844         for (i=m-1; i>=0; i--) {
845           v  = aa + 49*ai[i];
846           vi = aj + ai[i];
847           nz = ai[i+1] - ai[i];
848           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
849           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
850           while (nz--) {
851             idx = 7*(*vi++);
852             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
853             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
854             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
855             v  += 49;
856           }
857           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
858           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
859           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
860           idiag  -= 49;
861           i2     -= 7;
862         }
863         break;
864       default:
865         for (i=m-1; i>=0; i--) {
866           v  = aa + bs2*ai[i];
867           vi = aj + ai[i];
868           nz = ai[i+1] - ai[i];
869 
870           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
871           /* copy all rows of x that are needed into contiguous space */
872           workt = work;
873           for (j=0; j<nz; j++) {
874             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
875             workt += bs;
876           }
877           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
878           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
879 
880           idiag -= bs2;
881           i2    -= bs;
882         }
883         break;
884       }
885       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
886     }
887   }
888   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
889   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
890   PetscFunctionReturn(0);
891 }
892 
893 
894 /*
895     Special version for direct calls from Fortran (Used in PETSc-fun3d)
896 */
897 #if defined(PETSC_HAVE_FORTRAN_CAPS)
898 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
899 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
900 #define matsetvaluesblocked4_ matsetvaluesblocked4
901 #endif
902 
903 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
904 {
905   Mat               A  = *AA;
906   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
907   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
908   PetscInt          *ai    =a->i,*ailen=a->ilen;
909   PetscInt          *aj    =a->j,stepval,lastcol = -1;
910   const PetscScalar *value = v;
911   MatScalar         *ap,*aa = a->a,*bap;
912 
913   PetscFunctionBegin;
914   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
915   stepval = (n-1)*4;
916   for (k=0; k<m; k++) { /* loop over added rows */
917     row  = im[k];
918     rp   = aj + ai[row];
919     ap   = aa + 16*ai[row];
920     nrow = ailen[row];
921     low  = 0;
922     high = nrow;
923     for (l=0; l<n; l++) { /* loop over added columns */
924       col = in[l];
925       if (col <= lastcol)  low = 0;
926       else                high = nrow;
927       lastcol = col;
928       value   = v + k*(stepval+4 + l)*4;
929       while (high-low > 7) {
930         t = (low+high)/2;
931         if (rp[t] > col) high = t;
932         else             low  = t;
933       }
934       for (i=low; i<high; i++) {
935         if (rp[i] > col) break;
936         if (rp[i] == col) {
937           bap = ap +  16*i;
938           for (ii=0; ii<4; ii++,value+=stepval) {
939             for (jj=ii; jj<16; jj+=4) {
940               bap[jj] += *value++;
941             }
942           }
943           goto noinsert2;
944         }
945       }
946       N = nrow++ - 1;
947       high++; /* added new column index thus must search to one higher than before */
948       /* shift up all the later entries in this row */
949       for (ii=N; ii>=i; ii--) {
950         rp[ii+1] = rp[ii];
951         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
952       }
953       if (N >= i) {
954         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
955       }
956       rp[i] = col;
957       bap   = ap +  16*i;
958       for (ii=0; ii<4; ii++,value+=stepval) {
959         for (jj=ii; jj<16; jj+=4) {
960           bap[jj] = *value++;
961         }
962       }
963       noinsert2:;
964       low = i;
965     }
966     ailen[row] = nrow;
967   }
968   PetscFunctionReturnVoid();
969 }
970 
971 #if defined(PETSC_HAVE_FORTRAN_CAPS)
972 #define matsetvalues4_ MATSETVALUES4
973 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
974 #define matsetvalues4_ matsetvalues4
975 #endif
976 
977 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
978 {
979   Mat         A  = *AA;
980   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
981   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
982   PetscInt    *ai=a->i,*ailen=a->ilen;
983   PetscInt    *aj=a->j,brow,bcol;
984   PetscInt    ridx,cidx,lastcol = -1;
985   MatScalar   *ap,value,*aa=a->a,*bap;
986 
987   PetscFunctionBegin;
988   for (k=0; k<m; k++) { /* loop over added rows */
989     row  = im[k]; brow = row/4;
990     rp   = aj + ai[brow];
991     ap   = aa + 16*ai[brow];
992     nrow = ailen[brow];
993     low  = 0;
994     high = nrow;
995     for (l=0; l<n; l++) { /* loop over added columns */
996       col   = in[l]; bcol = col/4;
997       ridx  = row % 4; cidx = col % 4;
998       value = v[l + k*n];
999       if (col <= lastcol)  low = 0;
1000       else                high = nrow;
1001       lastcol = col;
1002       while (high-low > 7) {
1003         t = (low+high)/2;
1004         if (rp[t] > bcol) high = t;
1005         else              low  = t;
1006       }
1007       for (i=low; i<high; i++) {
1008         if (rp[i] > bcol) break;
1009         if (rp[i] == bcol) {
1010           bap   = ap +  16*i + 4*cidx + ridx;
1011           *bap += value;
1012           goto noinsert1;
1013         }
1014       }
1015       N = nrow++ - 1;
1016       high++; /* added new column thus must search to one higher than before */
1017       /* shift up all the later entries in this row */
1018       for (ii=N; ii>=i; ii--) {
1019         rp[ii+1] = rp[ii];
1020         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1021       }
1022       if (N>=i) {
1023         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1024       }
1025       rp[i]                    = bcol;
1026       ap[16*i + 4*cidx + ridx] = value;
1027 noinsert1:;
1028       low = i;
1029     }
1030     ailen[brow] = nrow;
1031   }
1032   PetscFunctionReturnVoid();
1033 }
1034 
1035 /*
1036      Checks for missing diagonals
1037 */
1038 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1039 {
1040   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1041   PetscErrorCode ierr;
1042   PetscInt       *diag,*ii = a->i,i;
1043 
1044   PetscFunctionBegin;
1045   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1046   *missing = PETSC_FALSE;
1047   if (A->rmap->n > 0 && !ii) {
1048     *missing = PETSC_TRUE;
1049     if (d) *d = 0;
1050     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
1051   } else {
1052     diag = a->diag;
1053     for (i=0; i<a->mbs; i++) {
1054       if (diag[i] >= ii[i+1]) {
1055         *missing = PETSC_TRUE;
1056         if (d) *d = i;
1057         ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr);
1058         break;
1059       }
1060     }
1061   }
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1066 {
1067   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1068   PetscErrorCode ierr;
1069   PetscInt       i,j,m = a->mbs;
1070 
1071   PetscFunctionBegin;
1072   if (!a->diag) {
1073     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1074     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1075     a->free_diag = PETSC_TRUE;
1076   }
1077   for (i=0; i<m; i++) {
1078     a->diag[i] = a->i[i+1];
1079     for (j=a->i[i]; j<a->i[i+1]; j++) {
1080       if (a->j[j] == i) {
1081         a->diag[i] = j;
1082         break;
1083       }
1084     }
1085   }
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 
1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1091 {
1092   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1093   PetscErrorCode ierr;
1094   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1095   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1096 
1097   PetscFunctionBegin;
1098   *nn = n;
1099   if (!ia) PetscFunctionReturn(0);
1100   if (symmetric) {
1101     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1102     nz   = tia[n];
1103   } else {
1104     tia = a->i; tja = a->j;
1105   }
1106 
1107   if (!blockcompressed && bs > 1) {
1108     (*nn) *= bs;
1109     /* malloc & create the natural set of indices */
1110     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1111     if (n) {
1112       (*ia)[0] = oshift;
1113       for (j=1; j<bs; j++) {
1114         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1115       }
1116     }
1117 
1118     for (i=1; i<n; i++) {
1119       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1120       for (j=1; j<bs; j++) {
1121         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1122       }
1123     }
1124     if (n) {
1125       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1126     }
1127 
1128     if (inja) {
1129       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1130       cnt = 0;
1131       for (i=0; i<n; i++) {
1132         for (j=0; j<bs; j++) {
1133           for (k=tia[i]; k<tia[i+1]; k++) {
1134             for (l=0; l<bs; l++) {
1135               (*ja)[cnt++] = bs*tja[k] + l;
1136             }
1137           }
1138         }
1139       }
1140     }
1141 
1142     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1143       ierr = PetscFree(tia);CHKERRQ(ierr);
1144       ierr = PetscFree(tja);CHKERRQ(ierr);
1145     }
1146   } else if (oshift == 1) {
1147     if (symmetric) {
1148       nz = tia[A->rmap->n/bs];
1149       /*  add 1 to i and j indices */
1150       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1151       *ia = tia;
1152       if (ja) {
1153         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1154         *ja = tja;
1155       }
1156     } else {
1157       nz = a->i[A->rmap->n/bs];
1158       /* malloc space and  add 1 to i and j indices */
1159       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1160       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1161       if (ja) {
1162         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1163         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1164       }
1165     }
1166   } else {
1167     *ia = tia;
1168     if (ja) *ja = tja;
1169   }
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1174 {
1175   PetscErrorCode ierr;
1176 
1177   PetscFunctionBegin;
1178   if (!ia) PetscFunctionReturn(0);
1179   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1180     ierr = PetscFree(*ia);CHKERRQ(ierr);
1181     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1182   }
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1187 {
1188   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1189   PetscErrorCode ierr;
1190 
1191   PetscFunctionBegin;
1192 #if defined(PETSC_USE_LOG)
1193   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1194 #endif
1195   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1196   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1197   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1198   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1199   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1200   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1201   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1202   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1203   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1204   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1205   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1206   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1207   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1208 
1209   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1210   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1211   ierr = PetscFree(A->data);CHKERRQ(ierr);
1212 
1213   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1214   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1215   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1224 #if defined(PETSC_HAVE_HYPRE)
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
1226 #endif
1227   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr);
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1233 {
1234   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1235   PetscErrorCode ierr;
1236 
1237   PetscFunctionBegin;
1238   switch (op) {
1239   case MAT_ROW_ORIENTED:
1240     a->roworiented = flg;
1241     break;
1242   case MAT_KEEP_NONZERO_PATTERN:
1243     a->keepnonzeropattern = flg;
1244     break;
1245   case MAT_NEW_NONZERO_LOCATIONS:
1246     a->nonew = (flg ? 0 : 1);
1247     break;
1248   case MAT_NEW_NONZERO_LOCATION_ERR:
1249     a->nonew = (flg ? -1 : 0);
1250     break;
1251   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1252     a->nonew = (flg ? -2 : 0);
1253     break;
1254   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1255     a->nounused = (flg ? -1 : 0);
1256     break;
1257   case MAT_NEW_DIAGONALS:
1258   case MAT_IGNORE_OFF_PROC_ENTRIES:
1259   case MAT_USE_HASH_TABLE:
1260     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1261     break;
1262   case MAT_SPD:
1263   case MAT_SYMMETRIC:
1264   case MAT_STRUCTURALLY_SYMMETRIC:
1265   case MAT_HERMITIAN:
1266   case MAT_SYMMETRY_ETERNAL:
1267   case MAT_SUBMAT_SINGLEIS:
1268   case MAT_STRUCTURE_ONLY:
1269     /* These options are handled directly by MatSetOption() */
1270     break;
1271   default:
1272     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1273   }
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1278 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1279 {
1280   PetscErrorCode ierr;
1281   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1282   MatScalar      *aa_i;
1283   PetscScalar    *v_i;
1284 
1285   PetscFunctionBegin;
1286   bs  = A->rmap->bs;
1287   bs2 = bs*bs;
1288   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1289 
1290   bn  = row/bs;   /* Block number */
1291   bp  = row % bs; /* Block Position */
1292   M   = ai[bn+1] - ai[bn];
1293   *nz = bs*M;
1294 
1295   if (v) {
1296     *v = 0;
1297     if (*nz) {
1298       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1299       for (i=0; i<M; i++) { /* for each block in the block row */
1300         v_i  = *v + i*bs;
1301         aa_i = aa + bs2*(ai[bn] + i);
1302         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1303       }
1304     }
1305   }
1306 
1307   if (idx) {
1308     *idx = 0;
1309     if (*nz) {
1310       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1311       for (i=0; i<M; i++) { /* for each block in the block row */
1312         idx_i = *idx + i*bs;
1313         itmp  = bs*aj[ai[bn] + i];
1314         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1315       }
1316     }
1317   }
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1322 {
1323   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1324   PetscErrorCode ierr;
1325 
1326   PetscFunctionBegin;
1327   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1332 {
1333   PetscErrorCode ierr;
1334 
1335   PetscFunctionBegin;
1336   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1337   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1342 {
1343   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1344   Mat            C;
1345   PetscErrorCode ierr;
1346   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1347   PetscInt       *rows,*cols,bs2=a->bs2;
1348   MatScalar      *array;
1349 
1350   PetscFunctionBegin;
1351   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1352     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1353 
1354     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1355     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1356     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1357     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1358     ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr);
1359     ierr = PetscFree(col);CHKERRQ(ierr);
1360   } else {
1361     C = *B;
1362   }
1363 
1364   array = a->a;
1365   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1366   for (i=0; i<mbs; i++) {
1367     cols[0] = i*bs;
1368     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1369     len = ai[i+1] - ai[i];
1370     for (j=0; j<len; j++) {
1371       rows[0] = (*aj++)*bs;
1372       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1373       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1374       array += bs2;
1375     }
1376   }
1377   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1378 
1379   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1380   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1381 
1382   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1383     *B = C;
1384   } else {
1385     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1386   }
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1391 {
1392   PetscErrorCode ierr;
1393   Mat            Btrans;
1394 
1395   PetscFunctionBegin;
1396   *f   = PETSC_FALSE;
1397   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1398   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1399   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1404 {
1405   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1406   PetscErrorCode ierr;
1407   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1408   int            fd;
1409   PetscScalar    *aa;
1410   FILE           *file;
1411 
1412   PetscFunctionBegin;
1413   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1414   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1415   col_lens[0] = MAT_FILE_CLASSID;
1416 
1417   col_lens[1] = A->rmap->N;
1418   col_lens[2] = A->cmap->n;
1419   col_lens[3] = a->nz*bs2;
1420 
1421   /* store lengths of each row and write (including header) to file */
1422   count = 0;
1423   for (i=0; i<a->mbs; i++) {
1424     for (j=0; j<bs; j++) {
1425       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1426     }
1427   }
1428   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1429   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1430 
1431   /* store column indices (zero start index) */
1432   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1433   count = 0;
1434   for (i=0; i<a->mbs; i++) {
1435     for (j=0; j<bs; j++) {
1436       for (k=a->i[i]; k<a->i[i+1]; k++) {
1437         for (l=0; l<bs; l++) {
1438           jj[count++] = bs*a->j[k] + l;
1439         }
1440       }
1441     }
1442   }
1443   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1444   ierr = PetscFree(jj);CHKERRQ(ierr);
1445 
1446   /* store nonzero values */
1447   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1448   count = 0;
1449   for (i=0; i<a->mbs; i++) {
1450     for (j=0; j<bs; j++) {
1451       for (k=a->i[i]; k<a->i[i+1]; k++) {
1452         for (l=0; l<bs; l++) {
1453           aa[count++] = a->a[bs2*k + l*bs + j];
1454         }
1455       }
1456     }
1457   }
1458   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1459   ierr = PetscFree(aa);CHKERRQ(ierr);
1460 
1461   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1462   if (file) {
1463     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1464   }
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1469 {
1470   PetscErrorCode ierr;
1471   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1472   PetscInt       i,bs = A->rmap->bs,k;
1473 
1474   PetscFunctionBegin;
1475   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1476   for (i=0; i<a->mbs; i++) {
1477     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1478     for (k=a->i[i]; k<a->i[i+1]; k++) {
1479       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1480     }
1481     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1482   }
1483   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1488 {
1489   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1490   PetscErrorCode    ierr;
1491   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1492   PetscViewerFormat format;
1493 
1494   PetscFunctionBegin;
1495   if (A->structure_only) {
1496     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1497     PetscFunctionReturn(0);
1498   }
1499 
1500   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1501   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1502     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1503   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1504     const char *matname;
1505     Mat        aij;
1506     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1507     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1508     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1509     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1510     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1511   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1512       PetscFunctionReturn(0);
1513   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1514     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1515     for (i=0; i<a->mbs; i++) {
1516       for (j=0; j<bs; j++) {
1517         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1518         for (k=a->i[i]; k<a->i[i+1]; k++) {
1519           for (l=0; l<bs; l++) {
1520 #if defined(PETSC_USE_COMPLEX)
1521             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1522               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1523                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1524             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1525               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1526                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1527             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1528               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1529             }
1530 #else
1531             if (a->a[bs2*k + l*bs + j] != 0.0) {
1532               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1533             }
1534 #endif
1535           }
1536         }
1537         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1538       }
1539     }
1540     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1541   } else {
1542     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1543     for (i=0; i<a->mbs; i++) {
1544       for (j=0; j<bs; j++) {
1545         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1546         for (k=a->i[i]; k<a->i[i+1]; k++) {
1547           for (l=0; l<bs; l++) {
1548 #if defined(PETSC_USE_COMPLEX)
1549             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1550               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1551                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1552             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1553               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1554                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1555             } else {
1556               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1557             }
1558 #else
1559             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1560 #endif
1561           }
1562         }
1563         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1564       }
1565     }
1566     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1567   }
1568   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1569   PetscFunctionReturn(0);
1570 }
1571 
1572 #include <petscdraw.h>
1573 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1574 {
1575   Mat               A = (Mat) Aa;
1576   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1577   PetscErrorCode    ierr;
1578   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1579   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1580   MatScalar         *aa;
1581   PetscViewer       viewer;
1582   PetscViewerFormat format;
1583 
1584   PetscFunctionBegin;
1585   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1586   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1587   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1588 
1589   /* loop over matrix elements drawing boxes */
1590 
1591   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1592     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1593     /* Blue for negative, Cyan for zero and  Red for positive */
1594     color = PETSC_DRAW_BLUE;
1595     for (i=0,row=0; i<mbs; i++,row+=bs) {
1596       for (j=a->i[i]; j<a->i[i+1]; j++) {
1597         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1598         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1599         aa  = a->a + j*bs2;
1600         for (k=0; k<bs; k++) {
1601           for (l=0; l<bs; l++) {
1602             if (PetscRealPart(*aa++) >=  0.) continue;
1603             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1604           }
1605         }
1606       }
1607     }
1608     color = PETSC_DRAW_CYAN;
1609     for (i=0,row=0; i<mbs; i++,row+=bs) {
1610       for (j=a->i[i]; j<a->i[i+1]; j++) {
1611         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1612         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1613         aa  = a->a + j*bs2;
1614         for (k=0; k<bs; k++) {
1615           for (l=0; l<bs; l++) {
1616             if (PetscRealPart(*aa++) != 0.) continue;
1617             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1618           }
1619         }
1620       }
1621     }
1622     color = PETSC_DRAW_RED;
1623     for (i=0,row=0; i<mbs; i++,row+=bs) {
1624       for (j=a->i[i]; j<a->i[i+1]; j++) {
1625         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1626         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1627         aa  = a->a + j*bs2;
1628         for (k=0; k<bs; k++) {
1629           for (l=0; l<bs; l++) {
1630             if (PetscRealPart(*aa++) <= 0.) continue;
1631             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1632           }
1633         }
1634       }
1635     }
1636     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1637   } else {
1638     /* use contour shading to indicate magnitude of values */
1639     /* first determine max of all nonzero values */
1640     PetscReal minv = 0.0, maxv = 0.0;
1641     PetscDraw popup;
1642 
1643     for (i=0; i<a->nz*a->bs2; i++) {
1644       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1645     }
1646     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1647     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1648     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1649 
1650     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1651     for (i=0,row=0; i<mbs; i++,row+=bs) {
1652       for (j=a->i[i]; j<a->i[i+1]; j++) {
1653         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1654         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1655         aa  = a->a + j*bs2;
1656         for (k=0; k<bs; k++) {
1657           for (l=0; l<bs; l++) {
1658             MatScalar v = *aa++;
1659             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1660             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1661           }
1662         }
1663       }
1664     }
1665     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1666   }
1667   PetscFunctionReturn(0);
1668 }
1669 
1670 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1671 {
1672   PetscErrorCode ierr;
1673   PetscReal      xl,yl,xr,yr,w,h;
1674   PetscDraw      draw;
1675   PetscBool      isnull;
1676 
1677   PetscFunctionBegin;
1678   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1679   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1680   if (isnull) PetscFunctionReturn(0);
1681 
1682   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1683   xr  += w;          yr += h;        xl = -w;     yl = -h;
1684   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1685   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1686   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1687   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1688   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1689   PetscFunctionReturn(0);
1690 }
1691 
1692 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1693 {
1694   PetscErrorCode ierr;
1695   PetscBool      iascii,isbinary,isdraw;
1696 
1697   PetscFunctionBegin;
1698   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1699   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1700   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1701   if (iascii) {
1702     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1703   } else if (isbinary) {
1704     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1705   } else if (isdraw) {
1706     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1707   } else {
1708     Mat B;
1709     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1710     ierr = MatView(B,viewer);CHKERRQ(ierr);
1711     ierr = MatDestroy(&B);CHKERRQ(ierr);
1712   }
1713   PetscFunctionReturn(0);
1714 }
1715 
1716 
1717 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1718 {
1719   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1720   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1721   PetscInt    *ai = a->i,*ailen = a->ilen;
1722   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1723   MatScalar   *ap,*aa = a->a;
1724 
1725   PetscFunctionBegin;
1726   for (k=0; k<m; k++) { /* loop over rows */
1727     row = im[k]; brow = row/bs;
1728     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1729     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1730     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1731     nrow = ailen[brow];
1732     for (l=0; l<n; l++) { /* loop over columns */
1733       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1734       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1735       col  = in[l];
1736       bcol = col/bs;
1737       cidx = col%bs;
1738       ridx = row%bs;
1739       high = nrow;
1740       low  = 0; /* assume unsorted */
1741       while (high-low > 5) {
1742         t = (low+high)/2;
1743         if (rp[t] > bcol) high = t;
1744         else             low  = t;
1745       }
1746       for (i=low; i<high; i++) {
1747         if (rp[i] > bcol) break;
1748         if (rp[i] == bcol) {
1749           *v++ = ap[bs2*i+bs*cidx+ridx];
1750           goto finished;
1751         }
1752       }
1753       *v++ = 0.0;
1754 finished:;
1755     }
1756   }
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1761 {
1762   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1763   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1764   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1765   PetscErrorCode    ierr;
1766   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1767   PetscBool         roworiented=a->roworiented;
1768   const PetscScalar *value     = v;
1769   MatScalar         *ap=NULL,*aa = a->a,*bap;
1770 
1771   PetscFunctionBegin;
1772   if (roworiented) {
1773     stepval = (n-1)*bs;
1774   } else {
1775     stepval = (m-1)*bs;
1776   }
1777   for (k=0; k<m; k++) { /* loop over added rows */
1778     row = im[k];
1779     if (row < 0) continue;
1780 #if defined(PETSC_USE_DEBUG)
1781     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1782 #endif
1783     rp   = aj + ai[row];
1784     if (!A->structure_only) ap = aa + bs2*ai[row];
1785     rmax = imax[row];
1786     nrow = ailen[row];
1787     low  = 0;
1788     high = nrow;
1789     for (l=0; l<n; l++) { /* loop over added columns */
1790       if (in[l] < 0) continue;
1791 #if defined(PETSC_USE_DEBUG)
1792       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1793 #endif
1794       col = in[l];
1795       if (!A->structure_only) {
1796         if (roworiented) {
1797           value = v + (k*(stepval+bs) + l)*bs;
1798         } else {
1799           value = v + (l*(stepval+bs) + k)*bs;
1800         }
1801       }
1802       if (col <= lastcol) low = 0;
1803       else high = nrow;
1804       lastcol = col;
1805       while (high-low > 7) {
1806         t = (low+high)/2;
1807         if (rp[t] > col) high = t;
1808         else             low  = t;
1809       }
1810       for (i=low; i<high; i++) {
1811         if (rp[i] > col) break;
1812         if (rp[i] == col) {
1813           if (A->structure_only) goto noinsert2;
1814           bap = ap +  bs2*i;
1815           if (roworiented) {
1816             if (is == ADD_VALUES) {
1817               for (ii=0; ii<bs; ii++,value+=stepval) {
1818                 for (jj=ii; jj<bs2; jj+=bs) {
1819                   bap[jj] += *value++;
1820                 }
1821               }
1822             } else {
1823               for (ii=0; ii<bs; ii++,value+=stepval) {
1824                 for (jj=ii; jj<bs2; jj+=bs) {
1825                   bap[jj] = *value++;
1826                 }
1827               }
1828             }
1829           } else {
1830             if (is == ADD_VALUES) {
1831               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1832                 for (jj=0; jj<bs; jj++) {
1833                   bap[jj] += value[jj];
1834                 }
1835                 bap += bs;
1836               }
1837             } else {
1838               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1839                 for (jj=0; jj<bs; jj++) {
1840                   bap[jj]  = value[jj];
1841                 }
1842                 bap += bs;
1843               }
1844             }
1845           }
1846           goto noinsert2;
1847         }
1848       }
1849       if (nonew == 1) goto noinsert2;
1850       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1851       if (A->structure_only) {
1852         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1853       } else {
1854         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1855       }
1856       N = nrow++ - 1; high++;
1857       /* shift up all the later entries in this row */
1858       for (ii=N; ii>=i; ii--) {
1859         rp[ii+1] = rp[ii];
1860         if (!A->structure_only) {
1861           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1862         }
1863       }
1864       if (N >= i && !A->structure_only) {
1865         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1866       }
1867 
1868       rp[i] = col;
1869       if (!A->structure_only) {
1870         bap   = ap +  bs2*i;
1871         if (roworiented) {
1872           for (ii=0; ii<bs; ii++,value+=stepval) {
1873             for (jj=ii; jj<bs2; jj+=bs) {
1874               bap[jj] = *value++;
1875             }
1876           }
1877         } else {
1878           for (ii=0; ii<bs; ii++,value+=stepval) {
1879             for (jj=0; jj<bs; jj++) {
1880               *bap++ = *value++;
1881             }
1882           }
1883         }
1884       }
1885 noinsert2:;
1886       low = i;
1887     }
1888     ailen[row] = nrow;
1889   }
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1894 {
1895   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1896   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1897   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1898   PetscErrorCode ierr;
1899   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1900   MatScalar      *aa  = a->a,*ap;
1901   PetscReal      ratio=0.6;
1902 
1903   PetscFunctionBegin;
1904   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1905 
1906   if (m) rmax = ailen[0];
1907   for (i=1; i<mbs; i++) {
1908     /* move each row back by the amount of empty slots (fshift) before it*/
1909     fshift += imax[i-1] - ailen[i-1];
1910     rmax    = PetscMax(rmax,ailen[i]);
1911     if (fshift) {
1912       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1913       N  = ailen[i];
1914       for (j=0; j<N; j++) {
1915         ip[j-fshift] = ip[j];
1916         if (!A->structure_only) {
1917           ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1918         }
1919       }
1920     }
1921     ai[i] = ai[i-1] + ailen[i-1];
1922   }
1923   if (mbs) {
1924     fshift += imax[mbs-1] - ailen[mbs-1];
1925     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1926   }
1927 
1928   /* reset ilen and imax for each row */
1929   a->nonzerorowcnt = 0;
1930   if (A->structure_only) {
1931     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1932   } else { /* !A->structure_only */
1933     for (i=0; i<mbs; i++) {
1934       ailen[i] = imax[i] = ai[i+1] - ai[i];
1935       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1936     }
1937   }
1938   a->nz = ai[mbs];
1939 
1940   /* diagonals may have moved, so kill the diagonal pointers */
1941   a->idiagvalid = PETSC_FALSE;
1942   if (fshift && a->diag) {
1943     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1944     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1945     a->diag = 0;
1946   }
1947   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1948   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1949   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1950   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1951 
1952   A->info.mallocs    += a->reallocs;
1953   a->reallocs         = 0;
1954   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1955   a->rmax             = rmax;
1956 
1957   if (!A->structure_only) {
1958     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1959   }
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 /*
1964    This function returns an array of flags which indicate the locations of contiguous
1965    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1966    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1967    Assume: sizes should be long enough to hold all the values.
1968 */
1969 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1970 {
1971   PetscInt  i,j,k,row;
1972   PetscBool flg;
1973 
1974   PetscFunctionBegin;
1975   for (i=0,j=0; i<n; j++) {
1976     row = idx[i];
1977     if (row%bs!=0) { /* Not the begining of a block */
1978       sizes[j] = 1;
1979       i++;
1980     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1981       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1982       i++;
1983     } else { /* Begining of the block, so check if the complete block exists */
1984       flg = PETSC_TRUE;
1985       for (k=1; k<bs; k++) {
1986         if (row+k != idx[i+k]) { /* break in the block */
1987           flg = PETSC_FALSE;
1988           break;
1989         }
1990       }
1991       if (flg) { /* No break in the bs */
1992         sizes[j] = bs;
1993         i       += bs;
1994       } else {
1995         sizes[j] = 1;
1996         i++;
1997       }
1998     }
1999   }
2000   *bs_max = j;
2001   PetscFunctionReturn(0);
2002 }
2003 
2004 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2005 {
2006   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2007   PetscErrorCode    ierr;
2008   PetscInt          i,j,k,count,*rows;
2009   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2010   PetscScalar       zero = 0.0;
2011   MatScalar         *aa;
2012   const PetscScalar *xx;
2013   PetscScalar       *bb;
2014 
2015   PetscFunctionBegin;
2016   /* fix right hand side if needed */
2017   if (x && b) {
2018     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2019     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2020     for (i=0; i<is_n; i++) {
2021       bb[is_idx[i]] = diag*xx[is_idx[i]];
2022     }
2023     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2024     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2025   }
2026 
2027   /* Make a copy of the IS and  sort it */
2028   /* allocate memory for rows,sizes */
2029   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2030 
2031   /* copy IS values to rows, and sort them */
2032   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2033   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2034 
2035   if (baij->keepnonzeropattern) {
2036     for (i=0; i<is_n; i++) sizes[i] = 1;
2037     bs_max          = is_n;
2038   } else {
2039     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2040     A->nonzerostate++;
2041   }
2042 
2043   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2044     row = rows[j];
2045     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2046     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2047     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2048     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2049       if (diag != (PetscScalar)0.0) {
2050         if (baij->ilen[row/bs] > 0) {
2051           baij->ilen[row/bs]       = 1;
2052           baij->j[baij->i[row/bs]] = row/bs;
2053 
2054           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2055         }
2056         /* Now insert all the diagonal values for this bs */
2057         for (k=0; k<bs; k++) {
2058           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2059         }
2060       } else { /* (diag == 0.0) */
2061         baij->ilen[row/bs] = 0;
2062       } /* end (diag == 0.0) */
2063     } else { /* (sizes[i] != bs) */
2064 #if defined(PETSC_USE_DEBUG)
2065       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2066 #endif
2067       for (k=0; k<count; k++) {
2068         aa[0] =  zero;
2069         aa   += bs;
2070       }
2071       if (diag != (PetscScalar)0.0) {
2072         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2073       }
2074     }
2075   }
2076 
2077   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2078   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   PetscFunctionReturn(0);
2080 }
2081 
2082 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2083 {
2084   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2085   PetscErrorCode    ierr;
2086   PetscInt          i,j,k,count;
2087   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2088   PetscScalar       zero = 0.0;
2089   MatScalar         *aa;
2090   const PetscScalar *xx;
2091   PetscScalar       *bb;
2092   PetscBool         *zeroed,vecs = PETSC_FALSE;
2093 
2094   PetscFunctionBegin;
2095   /* fix right hand side if needed */
2096   if (x && b) {
2097     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2098     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2099     vecs = PETSC_TRUE;
2100   }
2101 
2102   /* zero the columns */
2103   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2104   for (i=0; i<is_n; i++) {
2105     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2106     zeroed[is_idx[i]] = PETSC_TRUE;
2107   }
2108   for (i=0; i<A->rmap->N; i++) {
2109     if (!zeroed[i]) {
2110       row = i/bs;
2111       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2112         for (k=0; k<bs; k++) {
2113           col = bs*baij->j[j] + k;
2114           if (zeroed[col]) {
2115             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2116             if (vecs) bb[i] -= aa[0]*xx[col];
2117             aa[0] = 0.0;
2118           }
2119         }
2120       }
2121     } else if (vecs) bb[i] = diag*xx[i];
2122   }
2123   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2124   if (vecs) {
2125     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2126     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2127   }
2128 
2129   /* zero the rows */
2130   for (i=0; i<is_n; i++) {
2131     row   = is_idx[i];
2132     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2133     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2134     for (k=0; k<count; k++) {
2135       aa[0] =  zero;
2136       aa   += bs;
2137     }
2138     if (diag != (PetscScalar)0.0) {
2139       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2140     }
2141   }
2142   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2147 {
2148   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2149   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2150   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2151   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2152   PetscErrorCode ierr;
2153   PetscInt       ridx,cidx,bs2=a->bs2;
2154   PetscBool      roworiented=a->roworiented;
2155   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2156 
2157   PetscFunctionBegin;
2158   for (k=0; k<m; k++) { /* loop over added rows */
2159     row  = im[k];
2160     brow = row/bs;
2161     if (row < 0) continue;
2162 #if defined(PETSC_USE_DEBUG)
2163     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2164 #endif
2165     rp   = aj + ai[brow];
2166     if (!A->structure_only) ap = aa + bs2*ai[brow];
2167     rmax = imax[brow];
2168     nrow = ailen[brow];
2169     low  = 0;
2170     high = nrow;
2171     for (l=0; l<n; l++) { /* loop over added columns */
2172       if (in[l] < 0) continue;
2173 #if defined(PETSC_USE_DEBUG)
2174       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2175 #endif
2176       col  = in[l]; bcol = col/bs;
2177       ridx = row % bs; cidx = col % bs;
2178       if (!A->structure_only) {
2179         if (roworiented) {
2180           value = v[l + k*n];
2181         } else {
2182           value = v[k + l*m];
2183         }
2184       }
2185       if (col <= lastcol) low = 0; else high = nrow;
2186       lastcol = col;
2187       while (high-low > 7) {
2188         t = (low+high)/2;
2189         if (rp[t] > bcol) high = t;
2190         else              low  = t;
2191       }
2192       for (i=low; i<high; i++) {
2193         if (rp[i] > bcol) break;
2194         if (rp[i] == bcol) {
2195           bap = ap +  bs2*i + bs*cidx + ridx;
2196           if (!A->structure_only) {
2197             if (is == ADD_VALUES) *bap += value;
2198             else                  *bap  = value;
2199           }
2200           goto noinsert1;
2201         }
2202       }
2203       if (nonew == 1) goto noinsert1;
2204       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2205       if (A->structure_only) {
2206         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2207       } else {
2208         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2209       }
2210       N = nrow++ - 1; high++;
2211       /* shift up all the later entries in this row */
2212       for (ii=N; ii>=i; ii--) {
2213         rp[ii+1] = rp[ii];
2214         if (!A->structure_only) {
2215           ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2216         }
2217       }
2218       if (N>=i && !A->structure_only) {
2219         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2220       }
2221       rp[i]                      = bcol;
2222       if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value;
2223       a->nz++;
2224       A->nonzerostate++;
2225 noinsert1:;
2226       low = i;
2227     }
2228     ailen[brow] = nrow;
2229   }
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2234 {
2235   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2236   Mat            outA;
2237   PetscErrorCode ierr;
2238   PetscBool      row_identity,col_identity;
2239 
2240   PetscFunctionBegin;
2241   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2242   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2243   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2244   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2245 
2246   outA            = inA;
2247   inA->factortype = MAT_FACTOR_LU;
2248   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2249   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2250 
2251   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2252 
2253   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2254   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2255   a->row = row;
2256   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2257   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2258   a->col = col;
2259 
2260   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2261   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2262   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2263   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2264 
2265   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2266   if (!a->solve_work) {
2267     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2268     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2269   }
2270   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2271   PetscFunctionReturn(0);
2272 }
2273 
2274 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2275 {
2276   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2277   PetscInt    i,nz,mbs;
2278 
2279   PetscFunctionBegin;
2280   nz  = baij->maxnz;
2281   mbs = baij->mbs;
2282   for (i=0; i<nz; i++) {
2283     baij->j[i] = indices[i];
2284   }
2285   baij->nz = nz;
2286   for (i=0; i<mbs; i++) {
2287     baij->ilen[i] = baij->imax[i];
2288   }
2289   PetscFunctionReturn(0);
2290 }
2291 
2292 /*@
2293     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2294        in the matrix.
2295 
2296   Input Parameters:
2297 +  mat - the SeqBAIJ matrix
2298 -  indices - the column indices
2299 
2300   Level: advanced
2301 
2302   Notes:
2303     This can be called if you have precomputed the nonzero structure of the
2304   matrix and want to provide it to the matrix object to improve the performance
2305   of the MatSetValues() operation.
2306 
2307     You MUST have set the correct numbers of nonzeros per row in the call to
2308   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2309 
2310     MUST be called before any calls to MatSetValues();
2311 
2312 @*/
2313 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2314 {
2315   PetscErrorCode ierr;
2316 
2317   PetscFunctionBegin;
2318   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2319   PetscValidPointer(indices,2);
2320   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2321   PetscFunctionReturn(0);
2322 }
2323 
2324 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2325 {
2326   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2327   PetscErrorCode ierr;
2328   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2329   PetscReal      atmp;
2330   PetscScalar    *x,zero = 0.0;
2331   MatScalar      *aa;
2332   PetscInt       ncols,brow,krow,kcol;
2333 
2334   PetscFunctionBegin;
2335   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2336   bs  = A->rmap->bs;
2337   aa  = a->a;
2338   ai  = a->i;
2339   aj  = a->j;
2340   mbs = a->mbs;
2341 
2342   ierr = VecSet(v,zero);CHKERRQ(ierr);
2343   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2344   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2345   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2346   for (i=0; i<mbs; i++) {
2347     ncols = ai[1] - ai[0]; ai++;
2348     brow  = bs*i;
2349     for (j=0; j<ncols; j++) {
2350       for (kcol=0; kcol<bs; kcol++) {
2351         for (krow=0; krow<bs; krow++) {
2352           atmp = PetscAbsScalar(*aa);aa++;
2353           row  = brow + krow;   /* row index */
2354           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2355         }
2356       }
2357       aj++;
2358     }
2359   }
2360   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2361   PetscFunctionReturn(0);
2362 }
2363 
2364 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2365 {
2366   PetscErrorCode ierr;
2367 
2368   PetscFunctionBegin;
2369   /* If the two matrices have the same copy implementation, use fast copy. */
2370   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2371     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2372     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2373     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2374 
2375     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2376     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2377     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2378     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2379   } else {
2380     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2381   }
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2386 {
2387   PetscErrorCode ierr;
2388 
2389   PetscFunctionBegin;
2390   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2391   PetscFunctionReturn(0);
2392 }
2393 
2394 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2395 {
2396   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2397 
2398   PetscFunctionBegin;
2399   *array = a->a;
2400   PetscFunctionReturn(0);
2401 }
2402 
2403 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2404 {
2405   PetscFunctionBegin;
2406   PetscFunctionReturn(0);
2407 }
2408 
2409 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2410 {
2411   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2412   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2413   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2414   PetscErrorCode ierr;
2415 
2416   PetscFunctionBegin;
2417   /* Set the number of nonzeros in the new matrix */
2418   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2419   PetscFunctionReturn(0);
2420 }
2421 
2422 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2423 {
2424   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2425   PetscErrorCode ierr;
2426   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2427   PetscBLASInt   one=1;
2428 
2429   PetscFunctionBegin;
2430   if (str == SAME_NONZERO_PATTERN) {
2431     PetscScalar  alpha = a;
2432     PetscBLASInt bnz;
2433     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2434     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2435     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2436   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2437     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2438   } else {
2439     Mat      B;
2440     PetscInt *nnz;
2441     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2442     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2443     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2444     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2445     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2446     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2447     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2448     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2449     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2450     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2451     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2452     ierr = PetscFree(nnz);CHKERRQ(ierr);
2453   }
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2458 {
2459   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2460   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2461   MatScalar   *aa = a->a;
2462 
2463   PetscFunctionBegin;
2464   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2465   PetscFunctionReturn(0);
2466 }
2467 
2468 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2469 {
2470   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2471   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2472   MatScalar   *aa = a->a;
2473 
2474   PetscFunctionBegin;
2475   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2476   PetscFunctionReturn(0);
2477 }
2478 
2479 /*
2480     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2481 */
2482 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2483 {
2484   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2485   PetscErrorCode ierr;
2486   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2487   PetscInt       nz = a->i[m],row,*jj,mr,col;
2488 
2489   PetscFunctionBegin;
2490   *nn = n;
2491   if (!ia) PetscFunctionReturn(0);
2492   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2493   else {
2494     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2495     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2496     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2497     jj   = a->j;
2498     for (i=0; i<nz; i++) {
2499       collengths[jj[i]]++;
2500     }
2501     cia[0] = oshift;
2502     for (i=0; i<n; i++) {
2503       cia[i+1] = cia[i] + collengths[i];
2504     }
2505     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2506     jj   = a->j;
2507     for (row=0; row<m; row++) {
2508       mr = a->i[row+1] - a->i[row];
2509       for (i=0; i<mr; i++) {
2510         col = *jj++;
2511 
2512         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2513       }
2514     }
2515     ierr = PetscFree(collengths);CHKERRQ(ierr);
2516     *ia  = cia; *ja = cja;
2517   }
2518   PetscFunctionReturn(0);
2519 }
2520 
2521 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2522 {
2523   PetscErrorCode ierr;
2524 
2525   PetscFunctionBegin;
2526   if (!ia) PetscFunctionReturn(0);
2527   ierr = PetscFree(*ia);CHKERRQ(ierr);
2528   ierr = PetscFree(*ja);CHKERRQ(ierr);
2529   PetscFunctionReturn(0);
2530 }
2531 
2532 /*
2533  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2534  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2535  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2536  */
2537 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2538 {
2539   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2540   PetscErrorCode ierr;
2541   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2542   PetscInt       nz = a->i[m],row,*jj,mr,col;
2543   PetscInt       *cspidx;
2544 
2545   PetscFunctionBegin;
2546   *nn = n;
2547   if (!ia) PetscFunctionReturn(0);
2548 
2549   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2550   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2551   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2552   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2553   jj   = a->j;
2554   for (i=0; i<nz; i++) {
2555     collengths[jj[i]]++;
2556   }
2557   cia[0] = oshift;
2558   for (i=0; i<n; i++) {
2559     cia[i+1] = cia[i] + collengths[i];
2560   }
2561   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2562   jj   = a->j;
2563   for (row=0; row<m; row++) {
2564     mr = a->i[row+1] - a->i[row];
2565     for (i=0; i<mr; i++) {
2566       col = *jj++;
2567       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2568       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2569     }
2570   }
2571   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2572   *ia    = cia; *ja = cja;
2573   *spidx = cspidx;
2574   PetscFunctionReturn(0);
2575 }
2576 
2577 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2578 {
2579   PetscErrorCode ierr;
2580 
2581   PetscFunctionBegin;
2582   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2583   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2584   PetscFunctionReturn(0);
2585 }
2586 
2587 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2588 {
2589   PetscErrorCode ierr;
2590   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2591 
2592   PetscFunctionBegin;
2593   if (!Y->preallocated || !aij->nz) {
2594     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2595   }
2596   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2597   PetscFunctionReturn(0);
2598 }
2599 
2600 /* -------------------------------------------------------------------*/
2601 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2602                                        MatGetRow_SeqBAIJ,
2603                                        MatRestoreRow_SeqBAIJ,
2604                                        MatMult_SeqBAIJ_N,
2605                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2606                                        MatMultTranspose_SeqBAIJ,
2607                                        MatMultTransposeAdd_SeqBAIJ,
2608                                        0,
2609                                        0,
2610                                        0,
2611                                /* 10*/ 0,
2612                                        MatLUFactor_SeqBAIJ,
2613                                        0,
2614                                        0,
2615                                        MatTranspose_SeqBAIJ,
2616                                /* 15*/ MatGetInfo_SeqBAIJ,
2617                                        MatEqual_SeqBAIJ,
2618                                        MatGetDiagonal_SeqBAIJ,
2619                                        MatDiagonalScale_SeqBAIJ,
2620                                        MatNorm_SeqBAIJ,
2621                                /* 20*/ 0,
2622                                        MatAssemblyEnd_SeqBAIJ,
2623                                        MatSetOption_SeqBAIJ,
2624                                        MatZeroEntries_SeqBAIJ,
2625                                /* 24*/ MatZeroRows_SeqBAIJ,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                        0,
2630                                /* 29*/ MatSetUp_SeqBAIJ,
2631                                        0,
2632                                        0,
2633                                        0,
2634                                        0,
2635                                /* 34*/ MatDuplicate_SeqBAIJ,
2636                                        0,
2637                                        0,
2638                                        MatILUFactor_SeqBAIJ,
2639                                        0,
2640                                /* 39*/ MatAXPY_SeqBAIJ,
2641                                        MatCreateSubMatrices_SeqBAIJ,
2642                                        MatIncreaseOverlap_SeqBAIJ,
2643                                        MatGetValues_SeqBAIJ,
2644                                        MatCopy_SeqBAIJ,
2645                                /* 44*/ 0,
2646                                        MatScale_SeqBAIJ,
2647                                        MatShift_SeqBAIJ,
2648                                        0,
2649                                        MatZeroRowsColumns_SeqBAIJ,
2650                                /* 49*/ 0,
2651                                        MatGetRowIJ_SeqBAIJ,
2652                                        MatRestoreRowIJ_SeqBAIJ,
2653                                        MatGetColumnIJ_SeqBAIJ,
2654                                        MatRestoreColumnIJ_SeqBAIJ,
2655                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                        MatSetValuesBlocked_SeqBAIJ,
2660                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2661                                        MatDestroy_SeqBAIJ,
2662                                        MatView_SeqBAIJ,
2663                                        0,
2664                                        0,
2665                                /* 64*/ 0,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2671                                        0,
2672                                        MatConvert_Basic,
2673                                        0,
2674                                        0,
2675                                /* 74*/ 0,
2676                                        MatFDColoringApply_BAIJ,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                /* 79*/ 0,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                        MatLoad_SeqBAIJ,
2685                                /* 84*/ 0,
2686                                        0,
2687                                        0,
2688                                        0,
2689                                        0,
2690                                /* 89*/ 0,
2691                                        0,
2692                                        0,
2693                                        0,
2694                                        0,
2695                                /* 94*/ 0,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                /* 99*/ 0,
2701                                        0,
2702                                        0,
2703                                        0,
2704                                        0,
2705                                /*104*/ 0,
2706                                        MatRealPart_SeqBAIJ,
2707                                        MatImaginaryPart_SeqBAIJ,
2708                                        0,
2709                                        0,
2710                                /*109*/ 0,
2711                                        0,
2712                                        0,
2713                                        0,
2714                                        MatMissingDiagonal_SeqBAIJ,
2715                                /*114*/ 0,
2716                                        0,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                /*119*/ 0,
2721                                        0,
2722                                        MatMultHermitianTranspose_SeqBAIJ,
2723                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2724                                        0,
2725                                /*124*/ 0,
2726                                        0,
2727                                        MatInvertBlockDiagonal_SeqBAIJ,
2728                                        0,
2729                                        0,
2730                                /*129*/ 0,
2731                                        0,
2732                                        0,
2733                                        0,
2734                                        0,
2735                                /*134*/ 0,
2736                                        0,
2737                                        0,
2738                                        0,
2739                                        0,
2740                                /*139*/ MatSetBlockSizes_Default,
2741                                        0,
2742                                        0,
2743                                        MatFDColoringSetUp_SeqXAIJ,
2744                                        0,
2745                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2746                                        MatDestroySubMatrices_SeqBAIJ
2747 };
2748 
2749 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2750 {
2751   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2752   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2753   PetscErrorCode ierr;
2754 
2755   PetscFunctionBegin;
2756   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2757 
2758   /* allocate space for values if not already there */
2759   if (!aij->saved_values) {
2760     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2761     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2762   }
2763 
2764   /* copy values over */
2765   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2766   PetscFunctionReturn(0);
2767 }
2768 
2769 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2770 {
2771   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2772   PetscErrorCode ierr;
2773   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2774 
2775   PetscFunctionBegin;
2776   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2777   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2778 
2779   /* copy values over */
2780   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2781   PetscFunctionReturn(0);
2782 }
2783 
2784 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2785 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2786 
2787 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2788 {
2789   Mat_SeqBAIJ    *b;
2790   PetscErrorCode ierr;
2791   PetscInt       i,mbs,nbs,bs2;
2792   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2793 
2794   PetscFunctionBegin;
2795   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2796   if (nz == MAT_SKIP_ALLOCATION) {
2797     skipallocation = PETSC_TRUE;
2798     nz             = 0;
2799   }
2800 
2801   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2802   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2803   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2804   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2805 
2806   B->preallocated = PETSC_TRUE;
2807 
2808   mbs = B->rmap->n/bs;
2809   nbs = B->cmap->n/bs;
2810   bs2 = bs*bs;
2811 
2812   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2813 
2814   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2815   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2816   if (nnz) {
2817     for (i=0; i<mbs; i++) {
2818       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2819       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2820     }
2821   }
2822 
2823   b    = (Mat_SeqBAIJ*)B->data;
2824   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2825   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2826   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2827 
2828   if (!flg) {
2829     switch (bs) {
2830     case 1:
2831       B->ops->mult    = MatMult_SeqBAIJ_1;
2832       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2833       break;
2834     case 2:
2835       B->ops->mult    = MatMult_SeqBAIJ_2;
2836       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2837       break;
2838     case 3:
2839       B->ops->mult    = MatMult_SeqBAIJ_3;
2840       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2841       break;
2842     case 4:
2843       B->ops->mult    = MatMult_SeqBAIJ_4;
2844       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2845       break;
2846     case 5:
2847       B->ops->mult    = MatMult_SeqBAIJ_5;
2848       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2849       break;
2850     case 6:
2851       B->ops->mult    = MatMult_SeqBAIJ_6;
2852       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2853       break;
2854     case 7:
2855       B->ops->mult    = MatMult_SeqBAIJ_7;
2856       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2857       break;
2858     case 9:
2859 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2860       B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2861       B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2862 #else
2863       B->ops->mult    = MatMult_SeqBAIJ_N;
2864       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2865 #endif
2866       break;
2867     case 11:
2868       B->ops->mult    = MatMult_SeqBAIJ_11;
2869       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2870       break;
2871     case 15:
2872       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2873       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2874       break;
2875     default:
2876       B->ops->mult    = MatMult_SeqBAIJ_N;
2877       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2878       break;
2879     }
2880   }
2881   B->ops->sor = MatSOR_SeqBAIJ;
2882   b->mbs = mbs;
2883   b->nbs = nbs;
2884   if (!skipallocation) {
2885     if (!b->imax) {
2886       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2887       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2888 
2889       b->free_imax_ilen = PETSC_TRUE;
2890     }
2891     /* b->ilen will count nonzeros in each block row so far. */
2892     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2893     if (!nnz) {
2894       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2895       else if (nz < 0) nz = 1;
2896       for (i=0; i<mbs; i++) b->imax[i] = nz;
2897       nz = nz*mbs;
2898     } else {
2899       nz = 0;
2900       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2901     }
2902 
2903     /* allocate the matrix space */
2904     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2905     if (B->structure_only) {
2906       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2907       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2908       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2909     } else {
2910       ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2911       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2912       ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2913     }
2914     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2915 
2916     if (B->structure_only) {
2917       b->singlemalloc = PETSC_FALSE;
2918       b->free_a       = PETSC_FALSE;
2919     } else {
2920       b->singlemalloc = PETSC_TRUE;
2921       b->free_a       = PETSC_TRUE;
2922     }
2923     b->free_ij = PETSC_TRUE;
2924 
2925     b->i[0] = 0;
2926     for (i=1; i<mbs+1; i++) {
2927       b->i[i] = b->i[i-1] + b->imax[i-1];
2928     }
2929 
2930   } else {
2931     b->free_a  = PETSC_FALSE;
2932     b->free_ij = PETSC_FALSE;
2933   }
2934 
2935   b->bs2              = bs2;
2936   b->mbs              = mbs;
2937   b->nz               = 0;
2938   b->maxnz            = nz;
2939   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2940   B->was_assembled    = PETSC_FALSE;
2941   B->assembled        = PETSC_FALSE;
2942   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2943   PetscFunctionReturn(0);
2944 }
2945 
2946 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2947 {
2948   PetscInt       i,m,nz,nz_max=0,*nnz;
2949   PetscScalar    *values=0;
2950   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2951   PetscErrorCode ierr;
2952 
2953   PetscFunctionBegin;
2954   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2955   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2956   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2957   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2958   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2959   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2960   m    = B->rmap->n/bs;
2961 
2962   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2963   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2964   for (i=0; i<m; i++) {
2965     nz = ii[i+1]- ii[i];
2966     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2967     nz_max = PetscMax(nz_max, nz);
2968     nnz[i] = nz;
2969   }
2970   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2971   ierr = PetscFree(nnz);CHKERRQ(ierr);
2972 
2973   values = (PetscScalar*)V;
2974   if (!values) {
2975     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2976   }
2977   for (i=0; i<m; i++) {
2978     PetscInt          ncols  = ii[i+1] - ii[i];
2979     const PetscInt    *icols = jj + ii[i];
2980     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2981     if (!roworiented) {
2982       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2983     } else {
2984       PetscInt j;
2985       for (j=0; j<ncols; j++) {
2986         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2987         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2988       }
2989     }
2990   }
2991   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2992   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2993   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2994   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2995   PetscFunctionReturn(0);
2996 }
2997 
2998 /*MC
2999    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3000    block sparse compressed row format.
3001 
3002    Options Database Keys:
3003 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3004 
3005   Level: beginner
3006 
3007 .seealso: MatCreateSeqBAIJ()
3008 M*/
3009 
3010 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3011 
3012 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3013 {
3014   PetscErrorCode ierr;
3015   PetscMPIInt    size;
3016   Mat_SeqBAIJ    *b;
3017 
3018   PetscFunctionBegin;
3019   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3020   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3021 
3022   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3023   B->data = (void*)b;
3024   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3025 
3026   b->row          = 0;
3027   b->col          = 0;
3028   b->icol         = 0;
3029   b->reallocs     = 0;
3030   b->saved_values = 0;
3031 
3032   b->roworiented        = PETSC_TRUE;
3033   b->nonew              = 0;
3034   b->diag               = 0;
3035   B->spptr              = 0;
3036   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3037   b->keepnonzeropattern = PETSC_FALSE;
3038 
3039   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3048 #if defined(PETSC_HAVE_HYPRE)
3049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3050 #endif
3051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3053   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3054   PetscFunctionReturn(0);
3055 }
3056 
3057 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3058 {
3059   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3060   PetscErrorCode ierr;
3061   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3062 
3063   PetscFunctionBegin;
3064   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3065 
3066   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3067     c->imax           = a->imax;
3068     c->ilen           = a->ilen;
3069     c->free_imax_ilen = PETSC_FALSE;
3070   } else {
3071     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3072     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3073     for (i=0; i<mbs; i++) {
3074       c->imax[i] = a->imax[i];
3075       c->ilen[i] = a->ilen[i];
3076     }
3077     c->free_imax_ilen = PETSC_TRUE;
3078   }
3079 
3080   /* allocate the matrix space */
3081   if (mallocmatspace) {
3082     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3083       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3084       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3085 
3086       c->i            = a->i;
3087       c->j            = a->j;
3088       c->singlemalloc = PETSC_FALSE;
3089       c->free_a       = PETSC_TRUE;
3090       c->free_ij      = PETSC_FALSE;
3091       c->parent       = A;
3092       C->preallocated = PETSC_TRUE;
3093       C->assembled    = PETSC_TRUE;
3094 
3095       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3096       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3097       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3098     } else {
3099       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3100       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3101 
3102       c->singlemalloc = PETSC_TRUE;
3103       c->free_a       = PETSC_TRUE;
3104       c->free_ij      = PETSC_TRUE;
3105 
3106       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3107       if (mbs > 0) {
3108         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3109         if (cpvalues == MAT_COPY_VALUES) {
3110           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3111         } else {
3112           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3113         }
3114       }
3115       C->preallocated = PETSC_TRUE;
3116       C->assembled    = PETSC_TRUE;
3117     }
3118   }
3119 
3120   c->roworiented = a->roworiented;
3121   c->nonew       = a->nonew;
3122 
3123   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3124   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3125 
3126   c->bs2         = a->bs2;
3127   c->mbs         = a->mbs;
3128   c->nbs         = a->nbs;
3129 
3130   if (a->diag) {
3131     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3132       c->diag      = a->diag;
3133       c->free_diag = PETSC_FALSE;
3134     } else {
3135       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3136       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3137       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3138       c->free_diag = PETSC_TRUE;
3139     }
3140   } else c->diag = 0;
3141 
3142   c->nz         = a->nz;
3143   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3144   c->solve_work = NULL;
3145   c->mult_work  = NULL;
3146   c->sor_workt  = NULL;
3147   c->sor_work   = NULL;
3148 
3149   c->compressedrow.use   = a->compressedrow.use;
3150   c->compressedrow.nrows = a->compressedrow.nrows;
3151   if (a->compressedrow.use) {
3152     i    = a->compressedrow.nrows;
3153     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3154     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3155     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3156     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3157   } else {
3158     c->compressedrow.use    = PETSC_FALSE;
3159     c->compressedrow.i      = NULL;
3160     c->compressedrow.rindex = NULL;
3161   }
3162   C->nonzerostate = A->nonzerostate;
3163 
3164   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3165   PetscFunctionReturn(0);
3166 }
3167 
3168 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3169 {
3170   PetscErrorCode ierr;
3171 
3172   PetscFunctionBegin;
3173   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3174   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3175   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3176   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3177   PetscFunctionReturn(0);
3178 }
3179 
3180 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3181 {
3182   Mat_SeqBAIJ    *a;
3183   PetscErrorCode ierr;
3184   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3185   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3186   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3187   PetscInt       *masked,nmask,tmp,bs2,ishift;
3188   PetscMPIInt    size;
3189   int            fd;
3190   PetscScalar    *aa;
3191   MPI_Comm       comm;
3192 
3193   PetscFunctionBegin;
3194   /* force binary viewer to load .info file if it has not yet done so */
3195   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3196   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3197   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3198   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3199   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3200   if (bs < 0) bs = 1;
3201   bs2  = bs*bs;
3202 
3203   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3204   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3205   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3206   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3207   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3208   M = header[1]; N = header[2]; nz = header[3];
3209 
3210   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3211   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3212 
3213   /*
3214      This code adds extra rows to make sure the number of rows is
3215     divisible by the blocksize
3216   */
3217   mbs        = M/bs;
3218   extra_rows = bs - M + bs*(mbs);
3219   if (extra_rows == bs) extra_rows = 0;
3220   else mbs++;
3221   if (extra_rows) {
3222     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3223   }
3224 
3225   /* Set global sizes if not already set */
3226   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3227     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3228   } else { /* Check if the matrix global sizes are correct */
3229     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3230     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3231       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3232     }
3233     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3234   }
3235 
3236   /* read in row lengths */
3237   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3238   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3239   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3240 
3241   /* read in column indices */
3242   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3243   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3244   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3245 
3246   /* loop over row lengths determining block row lengths */
3247   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3248   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3249   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3250   rowcount = 0;
3251   nzcount  = 0;
3252   for (i=0; i<mbs; i++) {
3253     nmask = 0;
3254     for (j=0; j<bs; j++) {
3255       kmax = rowlengths[rowcount];
3256       for (k=0; k<kmax; k++) {
3257         tmp = jj[nzcount++]/bs;
3258         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3259       }
3260       rowcount++;
3261     }
3262     browlengths[i] += nmask;
3263     /* zero out the mask elements we set */
3264     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3265   }
3266 
3267   /* Do preallocation  */
3268   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3269   a    = (Mat_SeqBAIJ*)newmat->data;
3270 
3271   /* set matrix "i" values */
3272   a->i[0] = 0;
3273   for (i=1; i<= mbs; i++) {
3274     a->i[i]      = a->i[i-1] + browlengths[i-1];
3275     a->ilen[i-1] = browlengths[i-1];
3276   }
3277   a->nz = 0;
3278   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3279 
3280   /* read in nonzero values */
3281   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3282   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3283   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3284 
3285   /* set "a" and "j" values into matrix */
3286   nzcount = 0; jcount = 0;
3287   for (i=0; i<mbs; i++) {
3288     nzcountb = nzcount;
3289     nmask    = 0;
3290     for (j=0; j<bs; j++) {
3291       kmax = rowlengths[i*bs+j];
3292       for (k=0; k<kmax; k++) {
3293         tmp = jj[nzcount++]/bs;
3294         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3295       }
3296     }
3297     /* sort the masked values */
3298     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3299 
3300     /* set "j" values into matrix */
3301     maskcount = 1;
3302     for (j=0; j<nmask; j++) {
3303       a->j[jcount++]  = masked[j];
3304       mask[masked[j]] = maskcount++;
3305     }
3306     /* set "a" values into matrix */
3307     ishift = bs2*a->i[i];
3308     for (j=0; j<bs; j++) {
3309       kmax = rowlengths[i*bs+j];
3310       for (k=0; k<kmax; k++) {
3311         tmp       = jj[nzcountb]/bs;
3312         block     = mask[tmp] - 1;
3313         point     = jj[nzcountb] - bs*tmp;
3314         idx       = ishift + bs2*block + j + bs*point;
3315         a->a[idx] = (MatScalar)aa[nzcountb++];
3316       }
3317     }
3318     /* zero out the mask elements we set */
3319     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3320   }
3321   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3322 
3323   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3324   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3325   ierr = PetscFree(aa);CHKERRQ(ierr);
3326   ierr = PetscFree(jj);CHKERRQ(ierr);
3327   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3328 
3329   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3330   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3331   PetscFunctionReturn(0);
3332 }
3333 
3334 /*@C
3335    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3336    compressed row) format.  For good matrix assembly performance the
3337    user should preallocate the matrix storage by setting the parameter nz
3338    (or the array nnz).  By setting these parameters accurately, performance
3339    during matrix assembly can be increased by more than a factor of 50.
3340 
3341    Collective on MPI_Comm
3342 
3343    Input Parameters:
3344 +  comm - MPI communicator, set to PETSC_COMM_SELF
3345 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3346           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3347 .  m - number of rows
3348 .  n - number of columns
3349 .  nz - number of nonzero blocks  per block row (same for all rows)
3350 -  nnz - array containing the number of nonzero blocks in the various block rows
3351          (possibly different for each block row) or NULL
3352 
3353    Output Parameter:
3354 .  A - the matrix
3355 
3356    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3357    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3358    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3359 
3360    Options Database Keys:
3361 .   -mat_no_unroll - uses code that does not unroll the loops in the
3362                      block calculations (much slower)
3363 .    -mat_block_size - size of the blocks to use
3364 
3365    Level: intermediate
3366 
3367    Notes:
3368    The number of rows and columns must be divisible by blocksize.
3369 
3370    If the nnz parameter is given then the nz parameter is ignored
3371 
3372    A nonzero block is any block that as 1 or more nonzeros in it
3373 
3374    The block AIJ format is fully compatible with standard Fortran 77
3375    storage.  That is, the stored row and column indices can begin at
3376    either one (as in Fortran) or zero.  See the users' manual for details.
3377 
3378    Specify the preallocated storage with either nz or nnz (not both).
3379    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3380    allocation.  See Users-Manual: ch_mat for details.
3381    matrices.
3382 
3383 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3384 @*/
3385 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3386 {
3387   PetscErrorCode ierr;
3388 
3389   PetscFunctionBegin;
3390   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3391   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3392   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3393   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3394   PetscFunctionReturn(0);
3395 }
3396 
3397 /*@C
3398    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3399    per row in the matrix. For good matrix assembly performance the
3400    user should preallocate the matrix storage by setting the parameter nz
3401    (or the array nnz).  By setting these parameters accurately, performance
3402    during matrix assembly can be increased by more than a factor of 50.
3403 
3404    Collective on MPI_Comm
3405 
3406    Input Parameters:
3407 +  B - the matrix
3408 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3409           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3410 .  nz - number of block nonzeros per block row (same for all rows)
3411 -  nnz - array containing the number of block nonzeros in the various block rows
3412          (possibly different for each block row) or NULL
3413 
3414    Options Database Keys:
3415 .   -mat_no_unroll - uses code that does not unroll the loops in the
3416                      block calculations (much slower)
3417 .   -mat_block_size - size of the blocks to use
3418 
3419    Level: intermediate
3420 
3421    Notes:
3422    If the nnz parameter is given then the nz parameter is ignored
3423 
3424    You can call MatGetInfo() to get information on how effective the preallocation was;
3425    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3426    You can also run with the option -info and look for messages with the string
3427    malloc in them to see if additional memory allocation was needed.
3428 
3429    The block AIJ format is fully compatible with standard Fortran 77
3430    storage.  That is, the stored row and column indices can begin at
3431    either one (as in Fortran) or zero.  See the users' manual for details.
3432 
3433    Specify the preallocated storage with either nz or nnz (not both).
3434    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3435    allocation.  See Users-Manual: ch_mat for details.
3436 
3437 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3438 @*/
3439 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3440 {
3441   PetscErrorCode ierr;
3442 
3443   PetscFunctionBegin;
3444   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3445   PetscValidType(B,1);
3446   PetscValidLogicalCollectiveInt(B,bs,2);
3447   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3448   PetscFunctionReturn(0);
3449 }
3450 
3451 /*@C
3452    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3453    (the default sequential PETSc format).
3454 
3455    Collective on MPI_Comm
3456 
3457    Input Parameters:
3458 +  B - the matrix
3459 .  i - the indices into j for the start of each local row (starts with zero)
3460 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3461 -  v - optional values in the matrix
3462 
3463    Level: developer
3464 
3465    Notes:
3466    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3467    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3468    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3469    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3470    block column and the second index is over columns within a block.
3471 
3472 .keywords: matrix, aij, compressed row, sparse
3473 
3474 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3475 @*/
3476 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3477 {
3478   PetscErrorCode ierr;
3479 
3480   PetscFunctionBegin;
3481   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3482   PetscValidType(B,1);
3483   PetscValidLogicalCollectiveInt(B,bs,2);
3484   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3485   PetscFunctionReturn(0);
3486 }
3487 
3488 
3489 /*@
3490      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3491 
3492      Collective on MPI_Comm
3493 
3494    Input Parameters:
3495 +  comm - must be an MPI communicator of size 1
3496 .  bs - size of block
3497 .  m - number of rows
3498 .  n - number of columns
3499 .  i - row indices
3500 .  j - column indices
3501 -  a - matrix values
3502 
3503    Output Parameter:
3504 .  mat - the matrix
3505 
3506    Level: advanced
3507 
3508    Notes:
3509        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3510     once the matrix is destroyed
3511 
3512        You cannot set new nonzero locations into this matrix, that will generate an error.
3513 
3514        The i and j indices are 0 based
3515 
3516        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3517 
3518       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3519       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3520       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3521       with column-major ordering within blocks.
3522 
3523 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3524 
3525 @*/
3526 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3527 {
3528   PetscErrorCode ierr;
3529   PetscInt       ii;
3530   Mat_SeqBAIJ    *baij;
3531 
3532   PetscFunctionBegin;
3533   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3534   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3535 
3536   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3537   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3538   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3539   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3540   baij = (Mat_SeqBAIJ*)(*mat)->data;
3541   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3542   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3543 
3544   baij->i = i;
3545   baij->j = j;
3546   baij->a = a;
3547 
3548   baij->singlemalloc = PETSC_FALSE;
3549   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3550   baij->free_a       = PETSC_FALSE;
3551   baij->free_ij      = PETSC_FALSE;
3552 
3553   for (ii=0; ii<m; ii++) {
3554     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3555 #if defined(PETSC_USE_DEBUG)
3556     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3557 #endif
3558   }
3559 #if defined(PETSC_USE_DEBUG)
3560   for (ii=0; ii<baij->i[m]; ii++) {
3561     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3562     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3563   }
3564 #endif
3565 
3566   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3567   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3568   PetscFunctionReturn(0);
3569 }
3570 
3571 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3572 {
3573   PetscErrorCode ierr;
3574   PetscMPIInt    size;
3575 
3576   PetscFunctionBegin;
3577   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3578   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3579     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3580   } else {
3581     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3582   }
3583   PetscFunctionReturn(0);
3584 }
3585