xref: /petsc/src/mat/impls/baij/seq/baij.c (revision cf019ec6f319a509de7602f53cdadf853cfc8a83)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,const MatType,MatReuse,Mat*);
17 #endif
18 
19 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
20 {
21   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
22   PetscErrorCode ierr;
23   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
24   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
25   PetscReal      shift = 0.0;
26   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
27 
28   PetscFunctionBegin;
29   allowzeropivot = PetscNot(A->erroriffailure);
30 
31   if (a->idiagvalid) {
32     if (values) *values = a->idiag;
33     PetscFunctionReturn(0);
34   }
35   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
36   diag_offset = a->diag;
37   if (!a->idiag) {
38     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
39     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
40   }
41   diag  = a->idiag;
42   if (values) *values = a->idiag;
43   /* factor and invert each block */
44   switch (bs) {
45   case 1:
46     for (i=0; i<mbs; i++) {
47       odiag    = v + 1*diag_offset[i];
48       diag[0]  = odiag[0];
49 
50       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
51         if (allowzeropivot) {
52           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
53           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
54           A->factorerror_zeropivot_row   = i;
55           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
56         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
57       }
58 
59       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
60       diag    += 1;
61     }
62     break;
63   case 2:
64     for (i=0; i<mbs; i++) {
65       odiag    = v + 4*diag_offset[i];
66       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
67       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
68       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
69       diag    += 4;
70     }
71     break;
72   case 3:
73     for (i=0; i<mbs; i++) {
74       odiag    = v + 9*diag_offset[i];
75       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
76       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
77       diag[8]  = odiag[8];
78       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
79       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
80       diag    += 9;
81     }
82     break;
83   case 4:
84     for (i=0; i<mbs; i++) {
85       odiag  = v + 16*diag_offset[i];
86       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
87       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
88       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
89       diag  += 16;
90     }
91     break;
92   case 5:
93     for (i=0; i<mbs; i++) {
94       odiag  = v + 25*diag_offset[i];
95       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
97       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
98       diag  += 25;
99     }
100     break;
101   case 6:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 36*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
106       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
107       diag  += 36;
108     }
109     break;
110   case 7:
111     for (i=0; i<mbs; i++) {
112       odiag  = v + 49*diag_offset[i];
113       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
114       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
115       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
116       diag  += 49;
117     }
118     break;
119   default:
120     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
121     for (i=0; i<mbs; i++) {
122       odiag  = v + bs2*diag_offset[i];
123       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
124       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
125       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
126       diag  += bs2;
127     }
128     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
129   }
130   a->idiagvalid = PETSC_TRUE;
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
135 {
136   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
137   PetscScalar       *x,*work,*w,*workt,*t;
138   const MatScalar   *v,*aa = a->a, *idiag;
139   const PetscScalar *b,*xb;
140   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
141   PetscErrorCode    ierr;
142   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
143   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
144 
145   PetscFunctionBegin;
146   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
147   its = its*lits;
148   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
149   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
150   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
151   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
152   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
153 
154   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
155 
156   if (!m) PetscFunctionReturn(0);
157   diag  = a->diag;
158   idiag = a->idiag;
159   k    = PetscMax(A->rmap->n,A->cmap->n);
160   if (!a->mult_work) {
161     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
162   }
163   if (!a->sor_workt) {
164     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
165   }
166   if (!a->sor_work) {
167     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
168   }
169   work = a->mult_work;
170   t    = a->sor_workt;
171   w    = a->sor_work;
172 
173   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
174   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
175 
176   if (flag & SOR_ZERO_INITIAL_GUESS) {
177     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
178       switch (bs) {
179       case 1:
180         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
181         t[0] = b[0];
182         i2     = 1;
183         idiag += 1;
184         for (i=1; i<m; i++) {
185           v  = aa + ai[i];
186           vi = aj + ai[i];
187           nz = diag[i] - ai[i];
188           s[0] = b[i2];
189           for (j=0; j<nz; j++) {
190             xw[0] = x[vi[j]];
191             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
192           }
193           t[i2] = s[0];
194           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
195           x[i2]  = xw[0];
196           idiag += 1;
197           i2    += 1;
198         }
199         break;
200       case 2:
201         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
202         t[0] = b[0]; t[1] = b[1];
203         i2     = 2;
204         idiag += 4;
205         for (i=1; i<m; i++) {
206           v  = aa + 4*ai[i];
207           vi = aj + ai[i];
208           nz = diag[i] - ai[i];
209           s[0] = b[i2]; s[1] = b[i2+1];
210           for (j=0; j<nz; j++) {
211             idx = 2*vi[j];
212             it  = 4*j;
213             xw[0] = x[idx]; xw[1] = x[1+idx];
214             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
215           }
216           t[i2] = s[0]; t[i2+1] = s[1];
217           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
218           x[i2]   = xw[0]; x[i2+1] = xw[1];
219           idiag  += 4;
220           i2     += 2;
221         }
222         break;
223       case 3:
224         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
225         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
226         i2     = 3;
227         idiag += 9;
228         for (i=1; i<m; i++) {
229           v  = aa + 9*ai[i];
230           vi = aj + ai[i];
231           nz = diag[i] - ai[i];
232           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
233           while (nz--) {
234             idx = 3*(*vi++);
235             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
236             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
237             v  += 9;
238           }
239           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
240           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
241           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
242           idiag  += 9;
243           i2     += 3;
244         }
245         break;
246       case 4:
247         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
248         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
249         i2     = 4;
250         idiag += 16;
251         for (i=1; i<m; i++) {
252           v  = aa + 16*ai[i];
253           vi = aj + ai[i];
254           nz = diag[i] - ai[i];
255           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
256           while (nz--) {
257             idx = 4*(*vi++);
258             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
259             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
260             v  += 16;
261           }
262           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
263           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
264           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
265           idiag  += 16;
266           i2     += 4;
267         }
268         break;
269       case 5:
270         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
271         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
272         i2     = 5;
273         idiag += 25;
274         for (i=1; i<m; i++) {
275           v  = aa + 25*ai[i];
276           vi = aj + ai[i];
277           nz = diag[i] - ai[i];
278           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
279           while (nz--) {
280             idx = 5*(*vi++);
281             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
282             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
283             v  += 25;
284           }
285           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
286           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
287           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
288           idiag  += 25;
289           i2     += 5;
290         }
291         break;
292       case 6:
293         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
294         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
295         i2     = 6;
296         idiag += 36;
297         for (i=1; i<m; i++) {
298           v  = aa + 36*ai[i];
299           vi = aj + ai[i];
300           nz = diag[i] - ai[i];
301           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
302           while (nz--) {
303             idx = 6*(*vi++);
304             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
305             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
306             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
307             v  += 36;
308           }
309           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
310           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
311           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
312           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
313           idiag  += 36;
314           i2     += 6;
315         }
316         break;
317       case 7:
318         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
319         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
320         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
321         i2     = 7;
322         idiag += 49;
323         for (i=1; i<m; i++) {
324           v  = aa + 49*ai[i];
325           vi = aj + ai[i];
326           nz = diag[i] - ai[i];
327           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
328           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
329           while (nz--) {
330             idx = 7*(*vi++);
331             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
332             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
333             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
334             v  += 49;
335           }
336           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
337           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
338           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
339           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
340           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
341           idiag  += 49;
342           i2     += 7;
343         }
344         break;
345       default:
346         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
347         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
348         i2     = bs;
349         idiag += bs2;
350         for (i=1; i<m; i++) {
351           v  = aa + bs2*ai[i];
352           vi = aj + ai[i];
353           nz = diag[i] - ai[i];
354 
355           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
356           /* copy all rows of x that are needed into contiguous space */
357           workt = work;
358           for (j=0; j<nz; j++) {
359             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
360             workt += bs;
361           }
362           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
363           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
364           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
365 
366           idiag += bs2;
367           i2    += bs;
368         }
369         break;
370       }
371       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
372       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
373       xb = t;
374     }
375     else xb = b;
376     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
377       idiag = a->idiag+bs2*(a->mbs-1);
378       i2 = bs * (m-1);
379       switch (bs) {
380       case 1:
381         s[0]  = xb[i2];
382         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
383         x[i2] = xw[0];
384         i2   -= 1;
385         for (i=m-2; i>=0; i--) {
386           v  = aa + (diag[i]+1);
387           vi = aj + diag[i] + 1;
388           nz = ai[i+1] - diag[i] - 1;
389           s[0] = xb[i2];
390           for (j=0; j<nz; j++) {
391             xw[0] = x[vi[j]];
392             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
393           }
394           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
395           x[i2]  = xw[0];
396           idiag -= 1;
397           i2    -= 1;
398         }
399         break;
400       case 2:
401         s[0]  = xb[i2]; s[1] = xb[i2+1];
402         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
403         x[i2] = xw[0]; x[i2+1] = xw[1];
404         i2    -= 2;
405         idiag -= 4;
406         for (i=m-2; i>=0; i--) {
407           v  = aa + 4*(diag[i] + 1);
408           vi = aj + diag[i] + 1;
409           nz = ai[i+1] - diag[i] - 1;
410           s[0] = xb[i2]; s[1] = xb[i2+1];
411           for (j=0; j<nz; j++) {
412             idx = 2*vi[j];
413             it  = 4*j;
414             xw[0] = x[idx]; xw[1] = x[1+idx];
415             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
416           }
417           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
418           x[i2]   = xw[0]; x[i2+1] = xw[1];
419           idiag  -= 4;
420           i2     -= 2;
421         }
422         break;
423       case 3:
424         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
425         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
426         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
427         i2    -= 3;
428         idiag -= 9;
429         for (i=m-2; i>=0; i--) {
430           v  = aa + 9*(diag[i]+1);
431           vi = aj + diag[i] + 1;
432           nz = ai[i+1] - diag[i] - 1;
433           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
434           while (nz--) {
435             idx = 3*(*vi++);
436             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
437             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
438             v  += 9;
439           }
440           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
441           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
442           idiag  -= 9;
443           i2     -= 3;
444         }
445         break;
446       case 4:
447         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
448         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
449         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
450         i2    -= 4;
451         idiag -= 16;
452         for (i=m-2; i>=0; i--) {
453           v  = aa + 16*(diag[i]+1);
454           vi = aj + diag[i] + 1;
455           nz = ai[i+1] - diag[i] - 1;
456           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
457           while (nz--) {
458             idx = 4*(*vi++);
459             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
460             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
461             v  += 16;
462           }
463           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
464           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
465           idiag  -= 16;
466           i2     -= 4;
467         }
468         break;
469       case 5:
470         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
471         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
472         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
473         i2    -= 5;
474         idiag -= 25;
475         for (i=m-2; i>=0; i--) {
476           v  = aa + 25*(diag[i]+1);
477           vi = aj + diag[i] + 1;
478           nz = ai[i+1] - diag[i] - 1;
479           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
480           while (nz--) {
481             idx = 5*(*vi++);
482             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
483             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
484             v  += 25;
485           }
486           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
487           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
488           idiag  -= 25;
489           i2     -= 5;
490         }
491         break;
492       case 6:
493         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
494         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
495         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
496         i2    -= 6;
497         idiag -= 36;
498         for (i=m-2; i>=0; i--) {
499           v  = aa + 36*(diag[i]+1);
500           vi = aj + diag[i] + 1;
501           nz = ai[i+1] - diag[i] - 1;
502           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
503           while (nz--) {
504             idx = 6*(*vi++);
505             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
506             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
507             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
508             v  += 36;
509           }
510           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
511           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
512           idiag  -= 36;
513           i2     -= 6;
514         }
515         break;
516       case 7:
517         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
518         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
519         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
520         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
521         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
522         i2    -= 7;
523         idiag -= 49;
524         for (i=m-2; i>=0; i--) {
525           v  = aa + 49*(diag[i]+1);
526           vi = aj + diag[i] + 1;
527           nz = ai[i+1] - diag[i] - 1;
528           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
529           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
530           while (nz--) {
531             idx = 7*(*vi++);
532             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
533             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
534             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
535             v  += 49;
536           }
537           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
538           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
539           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
540           idiag  -= 49;
541           i2     -= 7;
542         }
543         break;
544       default:
545         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
546         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
547         i2    -= bs;
548         idiag -= bs2;
549         for (i=m-2; i>=0; i--) {
550           v  = aa + bs2*(diag[i]+1);
551           vi = aj + diag[i] + 1;
552           nz = ai[i+1] - diag[i] - 1;
553 
554           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
555           /* copy all rows of x that are needed into contiguous space */
556           workt = work;
557           for (j=0; j<nz; j++) {
558             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
559             workt += bs;
560           }
561           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
562           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
563 
564           idiag -= bs2;
565           i2    -= bs;
566         }
567         break;
568       }
569       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
570     }
571     its--;
572   }
573   while (its--) {
574     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
575       idiag = a->idiag;
576       i2 = 0;
577       switch (bs) {
578       case 1:
579         for (i=0; i<m; i++) {
580           v  = aa + ai[i];
581           vi = aj + ai[i];
582           nz = ai[i+1] - ai[i];
583           s[0] = b[i2];
584           for (j=0; j<nz; j++) {
585             xw[0] = x[vi[j]];
586             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
587           }
588           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
589           x[i2] += xw[0];
590           idiag += 1;
591           i2    += 1;
592         }
593         break;
594       case 2:
595         for (i=0; i<m; i++) {
596           v  = aa + 4*ai[i];
597           vi = aj + ai[i];
598           nz = ai[i+1] - ai[i];
599           s[0] = b[i2]; s[1] = b[i2+1];
600           for (j=0; j<nz; j++) {
601             idx = 2*vi[j];
602             it  = 4*j;
603             xw[0] = x[idx]; xw[1] = x[1+idx];
604             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
605           }
606           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
607           x[i2]  += xw[0]; x[i2+1] += xw[1];
608           idiag  += 4;
609           i2     += 2;
610         }
611         break;
612       case 3:
613         for (i=0; i<m; i++) {
614           v  = aa + 9*ai[i];
615           vi = aj + ai[i];
616           nz = ai[i+1] - ai[i];
617           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
618           while (nz--) {
619             idx = 3*(*vi++);
620             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
621             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
622             v  += 9;
623           }
624           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
625           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
626           idiag  += 9;
627           i2     += 3;
628         }
629         break;
630       case 4:
631         for (i=0; i<m; i++) {
632           v  = aa + 16*ai[i];
633           vi = aj + ai[i];
634           nz = ai[i+1] - ai[i];
635           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
636           while (nz--) {
637             idx = 4*(*vi++);
638             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
639             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
640             v  += 16;
641           }
642           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
643           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
644           idiag  += 16;
645           i2     += 4;
646         }
647         break;
648       case 5:
649         for (i=0; i<m; i++) {
650           v  = aa + 25*ai[i];
651           vi = aj + ai[i];
652           nz = ai[i+1] - ai[i];
653           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
654           while (nz--) {
655             idx = 5*(*vi++);
656             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
657             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
658             v  += 25;
659           }
660           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
661           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
662           idiag  += 25;
663           i2     += 5;
664         }
665         break;
666       case 6:
667         for (i=0; i<m; i++) {
668           v  = aa + 36*ai[i];
669           vi = aj + ai[i];
670           nz = ai[i+1] - ai[i];
671           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
672           while (nz--) {
673             idx = 6*(*vi++);
674             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
675             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
676             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
677             v  += 36;
678           }
679           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
680           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
681           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
682           idiag  += 36;
683           i2     += 6;
684         }
685         break;
686       case 7:
687         for (i=0; i<m; i++) {
688           v  = aa + 49*ai[i];
689           vi = aj + ai[i];
690           nz = ai[i+1] - ai[i];
691           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
692           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
693           while (nz--) {
694             idx = 7*(*vi++);
695             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
696             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
697             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
698             v  += 49;
699           }
700           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
701           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
702           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
703           idiag  += 49;
704           i2     += 7;
705         }
706         break;
707       default:
708         for (i=0; i<m; i++) {
709           v  = aa + bs2*ai[i];
710           vi = aj + ai[i];
711           nz = ai[i+1] - ai[i];
712 
713           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
714           /* copy all rows of x that are needed into contiguous space */
715           workt = work;
716           for (j=0; j<nz; j++) {
717             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
718             workt += bs;
719           }
720           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
721           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
722 
723           idiag += bs2;
724           i2    += bs;
725         }
726         break;
727       }
728       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
729     }
730     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
731       idiag = a->idiag+bs2*(a->mbs-1);
732       i2 = bs * (m-1);
733       switch (bs) {
734       case 1:
735         for (i=m-1; i>=0; i--) {
736           v  = aa + ai[i];
737           vi = aj + ai[i];
738           nz = ai[i+1] - ai[i];
739           s[0] = b[i2];
740           for (j=0; j<nz; j++) {
741             xw[0] = x[vi[j]];
742             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
743           }
744           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
745           x[i2] += xw[0];
746           idiag -= 1;
747           i2    -= 1;
748         }
749         break;
750       case 2:
751         for (i=m-1; i>=0; i--) {
752           v  = aa + 4*ai[i];
753           vi = aj + ai[i];
754           nz = ai[i+1] - ai[i];
755           s[0] = b[i2]; s[1] = b[i2+1];
756           for (j=0; j<nz; j++) {
757             idx = 2*vi[j];
758             it  = 4*j;
759             xw[0] = x[idx]; xw[1] = x[1+idx];
760             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
761           }
762           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
763           x[i2]  += xw[0]; x[i2+1] += xw[1];
764           idiag  -= 4;
765           i2     -= 2;
766         }
767         break;
768       case 3:
769         for (i=m-1; i>=0; i--) {
770           v  = aa + 9*ai[i];
771           vi = aj + ai[i];
772           nz = ai[i+1] - ai[i];
773           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
774           while (nz--) {
775             idx = 3*(*vi++);
776             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
777             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
778             v  += 9;
779           }
780           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
781           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
782           idiag  -= 9;
783           i2     -= 3;
784         }
785         break;
786       case 4:
787         for (i=m-1; i>=0; i--) {
788           v  = aa + 16*ai[i];
789           vi = aj + ai[i];
790           nz = ai[i+1] - ai[i];
791           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
792           while (nz--) {
793             idx = 4*(*vi++);
794             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
795             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
796             v  += 16;
797           }
798           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
799           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
800           idiag  -= 16;
801           i2     -= 4;
802         }
803         break;
804       case 5:
805         for (i=m-1; i>=0; i--) {
806           v  = aa + 25*ai[i];
807           vi = aj + ai[i];
808           nz = ai[i+1] - ai[i];
809           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
810           while (nz--) {
811             idx = 5*(*vi++);
812             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
813             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
814             v  += 25;
815           }
816           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
817           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
818           idiag  -= 25;
819           i2     -= 5;
820         }
821         break;
822       case 6:
823         for (i=m-1; i>=0; i--) {
824           v  = aa + 36*ai[i];
825           vi = aj + ai[i];
826           nz = ai[i+1] - ai[i];
827           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
828           while (nz--) {
829             idx = 6*(*vi++);
830             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
831             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
832             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
833             v  += 36;
834           }
835           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
836           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
837           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
838           idiag  -= 36;
839           i2     -= 6;
840         }
841         break;
842       case 7:
843         for (i=m-1; i>=0; i--) {
844           v  = aa + 49*ai[i];
845           vi = aj + ai[i];
846           nz = ai[i+1] - ai[i];
847           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
848           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
849           while (nz--) {
850             idx = 7*(*vi++);
851             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
852             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
853             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
854             v  += 49;
855           }
856           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
857           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
858           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
859           idiag  -= 49;
860           i2     -= 7;
861         }
862         break;
863       default:
864         for (i=m-1; i>=0; i--) {
865           v  = aa + bs2*ai[i];
866           vi = aj + ai[i];
867           nz = ai[i+1] - ai[i];
868 
869           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
870           /* copy all rows of x that are needed into contiguous space */
871           workt = work;
872           for (j=0; j<nz; j++) {
873             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
874             workt += bs;
875           }
876           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
877           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
878 
879           idiag -= bs2;
880           i2    -= bs;
881         }
882         break;
883       }
884       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
885     }
886   }
887   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
888   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
889   PetscFunctionReturn(0);
890 }
891 
892 
893 /*
894     Special version for direct calls from Fortran (Used in PETSc-fun3d)
895 */
896 #if defined(PETSC_HAVE_FORTRAN_CAPS)
897 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
898 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
899 #define matsetvaluesblocked4_ matsetvaluesblocked4
900 #endif
901 
902 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
903 {
904   Mat               A  = *AA;
905   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
906   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
907   PetscInt          *ai    =a->i,*ailen=a->ilen;
908   PetscInt          *aj    =a->j,stepval,lastcol = -1;
909   const PetscScalar *value = v;
910   MatScalar         *ap,*aa = a->a,*bap;
911 
912   PetscFunctionBegin;
913   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
914   stepval = (n-1)*4;
915   for (k=0; k<m; k++) { /* loop over added rows */
916     row  = im[k];
917     rp   = aj + ai[row];
918     ap   = aa + 16*ai[row];
919     nrow = ailen[row];
920     low  = 0;
921     high = nrow;
922     for (l=0; l<n; l++) { /* loop over added columns */
923       col = in[l];
924       if (col <= lastcol)  low = 0;
925       else                high = nrow;
926       lastcol = col;
927       value   = v + k*(stepval+4 + l)*4;
928       while (high-low > 7) {
929         t = (low+high)/2;
930         if (rp[t] > col) high = t;
931         else             low  = t;
932       }
933       for (i=low; i<high; i++) {
934         if (rp[i] > col) break;
935         if (rp[i] == col) {
936           bap = ap +  16*i;
937           for (ii=0; ii<4; ii++,value+=stepval) {
938             for (jj=ii; jj<16; jj+=4) {
939               bap[jj] += *value++;
940             }
941           }
942           goto noinsert2;
943         }
944       }
945       N = nrow++ - 1;
946       high++; /* added new column index thus must search to one higher than before */
947       /* shift up all the later entries in this row */
948       for (ii=N; ii>=i; ii--) {
949         rp[ii+1] = rp[ii];
950         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
951       }
952       if (N >= i) {
953         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
954       }
955       rp[i] = col;
956       bap   = ap +  16*i;
957       for (ii=0; ii<4; ii++,value+=stepval) {
958         for (jj=ii; jj<16; jj+=4) {
959           bap[jj] = *value++;
960         }
961       }
962       noinsert2:;
963       low = i;
964     }
965     ailen[row] = nrow;
966   }
967   PetscFunctionReturnVoid();
968 }
969 
970 #if defined(PETSC_HAVE_FORTRAN_CAPS)
971 #define matsetvalues4_ MATSETVALUES4
972 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
973 #define matsetvalues4_ matsetvalues4
974 #endif
975 
976 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
977 {
978   Mat         A  = *AA;
979   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
980   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
981   PetscInt    *ai=a->i,*ailen=a->ilen;
982   PetscInt    *aj=a->j,brow,bcol;
983   PetscInt    ridx,cidx,lastcol = -1;
984   MatScalar   *ap,value,*aa=a->a,*bap;
985 
986   PetscFunctionBegin;
987   for (k=0; k<m; k++) { /* loop over added rows */
988     row  = im[k]; brow = row/4;
989     rp   = aj + ai[brow];
990     ap   = aa + 16*ai[brow];
991     nrow = ailen[brow];
992     low  = 0;
993     high = nrow;
994     for (l=0; l<n; l++) { /* loop over added columns */
995       col   = in[l]; bcol = col/4;
996       ridx  = row % 4; cidx = col % 4;
997       value = v[l + k*n];
998       if (col <= lastcol)  low = 0;
999       else                high = nrow;
1000       lastcol = col;
1001       while (high-low > 7) {
1002         t = (low+high)/2;
1003         if (rp[t] > bcol) high = t;
1004         else              low  = t;
1005       }
1006       for (i=low; i<high; i++) {
1007         if (rp[i] > bcol) break;
1008         if (rp[i] == bcol) {
1009           bap   = ap +  16*i + 4*cidx + ridx;
1010           *bap += value;
1011           goto noinsert1;
1012         }
1013       }
1014       N = nrow++ - 1;
1015       high++; /* added new column thus must search to one higher than before */
1016       /* shift up all the later entries in this row */
1017       for (ii=N; ii>=i; ii--) {
1018         rp[ii+1] = rp[ii];
1019         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1020       }
1021       if (N>=i) {
1022         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1023       }
1024       rp[i]                    = bcol;
1025       ap[16*i + 4*cidx + ridx] = value;
1026 noinsert1:;
1027       low = i;
1028     }
1029     ailen[brow] = nrow;
1030   }
1031   PetscFunctionReturnVoid();
1032 }
1033 
1034 /*
1035      Checks for missing diagonals
1036 */
1037 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1038 {
1039   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1040   PetscErrorCode ierr;
1041   PetscInt       *diag,*ii = a->i,i;
1042 
1043   PetscFunctionBegin;
1044   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1045   *missing = PETSC_FALSE;
1046   if (A->rmap->n > 0 && !ii) {
1047     *missing = PETSC_TRUE;
1048     if (d) *d = 0;
1049     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1050   } else {
1051     diag = a->diag;
1052     for (i=0; i<a->mbs; i++) {
1053       if (diag[i] >= ii[i+1]) {
1054         *missing = PETSC_TRUE;
1055         if (d) *d = i;
1056         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1057         break;
1058       }
1059     }
1060   }
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1065 {
1066   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1067   PetscErrorCode ierr;
1068   PetscInt       i,j,m = a->mbs;
1069 
1070   PetscFunctionBegin;
1071   if (!a->diag) {
1072     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1073     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1074     a->free_diag = PETSC_TRUE;
1075   }
1076   for (i=0; i<m; i++) {
1077     a->diag[i] = a->i[i+1];
1078     for (j=a->i[i]; j<a->i[i+1]; j++) {
1079       if (a->j[j] == i) {
1080         a->diag[i] = j;
1081         break;
1082       }
1083     }
1084   }
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 
1089 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1090 {
1091   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1092   PetscErrorCode ierr;
1093   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1094   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1095 
1096   PetscFunctionBegin;
1097   *nn = n;
1098   if (!ia) PetscFunctionReturn(0);
1099   if (symmetric) {
1100     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1101     nz   = tia[n];
1102   } else {
1103     tia = a->i; tja = a->j;
1104   }
1105 
1106   if (!blockcompressed && bs > 1) {
1107     (*nn) *= bs;
1108     /* malloc & create the natural set of indices */
1109     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1110     if (n) {
1111       (*ia)[0] = oshift;
1112       for (j=1; j<bs; j++) {
1113         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1114       }
1115     }
1116 
1117     for (i=1; i<n; i++) {
1118       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1119       for (j=1; j<bs; j++) {
1120         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1121       }
1122     }
1123     if (n) {
1124       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1125     }
1126 
1127     if (inja) {
1128       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1129       cnt = 0;
1130       for (i=0; i<n; i++) {
1131         for (j=0; j<bs; j++) {
1132           for (k=tia[i]; k<tia[i+1]; k++) {
1133             for (l=0; l<bs; l++) {
1134               (*ja)[cnt++] = bs*tja[k] + l;
1135             }
1136           }
1137         }
1138       }
1139     }
1140 
1141     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1142       ierr = PetscFree(tia);CHKERRQ(ierr);
1143       ierr = PetscFree(tja);CHKERRQ(ierr);
1144     }
1145   } else if (oshift == 1) {
1146     if (symmetric) {
1147       nz = tia[A->rmap->n/bs];
1148       /*  add 1 to i and j indices */
1149       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1150       *ia = tia;
1151       if (ja) {
1152         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1153         *ja = tja;
1154       }
1155     } else {
1156       nz = a->i[A->rmap->n/bs];
1157       /* malloc space and  add 1 to i and j indices */
1158       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1159       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1160       if (ja) {
1161         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1162         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1163       }
1164     }
1165   } else {
1166     *ia = tia;
1167     if (ja) *ja = tja;
1168   }
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1173 {
1174   PetscErrorCode ierr;
1175 
1176   PetscFunctionBegin;
1177   if (!ia) PetscFunctionReturn(0);
1178   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1179     ierr = PetscFree(*ia);CHKERRQ(ierr);
1180     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1181   }
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1186 {
1187   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1188   PetscErrorCode ierr;
1189 
1190   PetscFunctionBegin;
1191 #if defined(PETSC_USE_LOG)
1192   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1193 #endif
1194   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1195   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1196   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1197   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1198   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1199   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1200   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1201   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1202   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1203   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1204   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1205   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1206   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1207 
1208   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1209   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1210   ierr = PetscFree(A->data);CHKERRQ(ierr);
1211 
1212   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1213   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1214   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1215   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1223 #if defined(PETSC_HAVE_HYPRE)
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1225 #endif
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1230 {
1231   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1232   PetscErrorCode ierr;
1233 
1234   PetscFunctionBegin;
1235   switch (op) {
1236   case MAT_ROW_ORIENTED:
1237     a->roworiented = flg;
1238     break;
1239   case MAT_KEEP_NONZERO_PATTERN:
1240     a->keepnonzeropattern = flg;
1241     break;
1242   case MAT_NEW_NONZERO_LOCATIONS:
1243     a->nonew = (flg ? 0 : 1);
1244     break;
1245   case MAT_NEW_NONZERO_LOCATION_ERR:
1246     a->nonew = (flg ? -1 : 0);
1247     break;
1248   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1249     a->nonew = (flg ? -2 : 0);
1250     break;
1251   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1252     a->nounused = (flg ? -1 : 0);
1253     break;
1254   case MAT_NEW_DIAGONALS:
1255   case MAT_IGNORE_OFF_PROC_ENTRIES:
1256   case MAT_USE_HASH_TABLE:
1257     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1258     break;
1259   case MAT_SPD:
1260   case MAT_SYMMETRIC:
1261   case MAT_STRUCTURALLY_SYMMETRIC:
1262   case MAT_HERMITIAN:
1263   case MAT_SYMMETRY_ETERNAL:
1264   case MAT_SUBMAT_SINGLEIS:
1265   case MAT_STRUCTURE_ONLY:
1266     /* These options are handled directly by MatSetOption() */
1267     break;
1268   default:
1269     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1270   }
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1275 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1276 {
1277   PetscErrorCode ierr;
1278   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1279   MatScalar      *aa_i;
1280   PetscScalar    *v_i;
1281 
1282   PetscFunctionBegin;
1283   bs  = A->rmap->bs;
1284   bs2 = bs*bs;
1285   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1286 
1287   bn  = row/bs;   /* Block number */
1288   bp  = row % bs; /* Block Position */
1289   M   = ai[bn+1] - ai[bn];
1290   *nz = bs*M;
1291 
1292   if (v) {
1293     *v = 0;
1294     if (*nz) {
1295       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1296       for (i=0; i<M; i++) { /* for each block in the block row */
1297         v_i  = *v + i*bs;
1298         aa_i = aa + bs2*(ai[bn] + i);
1299         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1300       }
1301     }
1302   }
1303 
1304   if (idx) {
1305     *idx = 0;
1306     if (*nz) {
1307       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1308       for (i=0; i<M; i++) { /* for each block in the block row */
1309         idx_i = *idx + i*bs;
1310         itmp  = bs*aj[ai[bn] + i];
1311         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1312       }
1313     }
1314   }
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1319 {
1320   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1321   PetscErrorCode ierr;
1322 
1323   PetscFunctionBegin;
1324   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1329 {
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1334   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1339 {
1340   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1341   Mat            C;
1342   PetscErrorCode ierr;
1343   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1344   PetscInt       *rows,*cols,bs2=a->bs2;
1345   MatScalar      *array;
1346 
1347   PetscFunctionBegin;
1348   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1349     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1350 
1351     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1352     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1353     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1354     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1355     ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr);
1356     ierr = PetscFree(col);CHKERRQ(ierr);
1357   } else {
1358     C = *B;
1359   }
1360 
1361   array = a->a;
1362   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1363   for (i=0; i<mbs; i++) {
1364     cols[0] = i*bs;
1365     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1366     len = ai[i+1] - ai[i];
1367     for (j=0; j<len; j++) {
1368       rows[0] = (*aj++)*bs;
1369       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1370       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1371       array += bs2;
1372     }
1373   }
1374   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1375 
1376   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1377   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1378 
1379   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1380     *B = C;
1381   } else {
1382     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1383   }
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1388 {
1389   PetscErrorCode ierr;
1390   Mat            Btrans;
1391 
1392   PetscFunctionBegin;
1393   *f   = PETSC_FALSE;
1394   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1395   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1396   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1401 {
1402   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1403   PetscErrorCode ierr;
1404   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1405   int            fd;
1406   PetscScalar    *aa;
1407   FILE           *file;
1408 
1409   PetscFunctionBegin;
1410   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1411   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1412   col_lens[0] = MAT_FILE_CLASSID;
1413 
1414   col_lens[1] = A->rmap->N;
1415   col_lens[2] = A->cmap->n;
1416   col_lens[3] = a->nz*bs2;
1417 
1418   /* store lengths of each row and write (including header) to file */
1419   count = 0;
1420   for (i=0; i<a->mbs; i++) {
1421     for (j=0; j<bs; j++) {
1422       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1423     }
1424   }
1425   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1426   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1427 
1428   /* store column indices (zero start index) */
1429   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1430   count = 0;
1431   for (i=0; i<a->mbs; i++) {
1432     for (j=0; j<bs; j++) {
1433       for (k=a->i[i]; k<a->i[i+1]; k++) {
1434         for (l=0; l<bs; l++) {
1435           jj[count++] = bs*a->j[k] + l;
1436         }
1437       }
1438     }
1439   }
1440   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1441   ierr = PetscFree(jj);CHKERRQ(ierr);
1442 
1443   /* store nonzero values */
1444   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1445   count = 0;
1446   for (i=0; i<a->mbs; i++) {
1447     for (j=0; j<bs; j++) {
1448       for (k=a->i[i]; k<a->i[i+1]; k++) {
1449         for (l=0; l<bs; l++) {
1450           aa[count++] = a->a[bs2*k + l*bs + j];
1451         }
1452       }
1453     }
1454   }
1455   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1456   ierr = PetscFree(aa);CHKERRQ(ierr);
1457 
1458   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1459   if (file) {
1460     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1461   }
1462   PetscFunctionReturn(0);
1463 }
1464 
1465 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1466 {
1467   PetscErrorCode ierr;
1468   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1469   PetscInt       i,bs = A->rmap->bs,k;
1470 
1471   PetscFunctionBegin;
1472   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1473   for (i=0; i<a->mbs; i++) {
1474     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1475     for (k=a->i[i]; k<a->i[i+1]; k++) {
1476       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1477     }
1478     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1479   }
1480   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1481   PetscFunctionReturn(0);
1482 }
1483 
1484 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1485 {
1486   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1487   PetscErrorCode    ierr;
1488   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1489   PetscViewerFormat format;
1490 
1491   PetscFunctionBegin;
1492   if (A->structure_only) {
1493     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1494     PetscFunctionReturn(0);
1495   }
1496 
1497   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1498   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1499     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1500   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1501     const char *matname;
1502     Mat        aij;
1503     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1504     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1505     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1506     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1507     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1508   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1509       PetscFunctionReturn(0);
1510   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1511     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1512     for (i=0; i<a->mbs; i++) {
1513       for (j=0; j<bs; j++) {
1514         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1515         for (k=a->i[i]; k<a->i[i+1]; k++) {
1516           for (l=0; l<bs; l++) {
1517 #if defined(PETSC_USE_COMPLEX)
1518             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1519               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1520                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1521             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1522               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1523                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1524             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1525               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1526             }
1527 #else
1528             if (a->a[bs2*k + l*bs + j] != 0.0) {
1529               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1530             }
1531 #endif
1532           }
1533         }
1534         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1535       }
1536     }
1537     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1538   } else {
1539     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1540     for (i=0; i<a->mbs; i++) {
1541       for (j=0; j<bs; j++) {
1542         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1543         for (k=a->i[i]; k<a->i[i+1]; k++) {
1544           for (l=0; l<bs; l++) {
1545 #if defined(PETSC_USE_COMPLEX)
1546             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1547               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1548                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1549             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1550               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1551                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1552             } else {
1553               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1554             }
1555 #else
1556             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1557 #endif
1558           }
1559         }
1560         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1561       }
1562     }
1563     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1564   }
1565   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1566   PetscFunctionReturn(0);
1567 }
1568 
1569 #include <petscdraw.h>
1570 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1571 {
1572   Mat               A = (Mat) Aa;
1573   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1574   PetscErrorCode    ierr;
1575   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1576   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1577   MatScalar         *aa;
1578   PetscViewer       viewer;
1579   PetscViewerFormat format;
1580 
1581   PetscFunctionBegin;
1582   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1583   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1584   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1585 
1586   /* loop over matrix elements drawing boxes */
1587 
1588   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1589     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1590     /* Blue for negative, Cyan for zero and  Red for positive */
1591     color = PETSC_DRAW_BLUE;
1592     for (i=0,row=0; i<mbs; i++,row+=bs) {
1593       for (j=a->i[i]; j<a->i[i+1]; j++) {
1594         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1595         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1596         aa  = a->a + j*bs2;
1597         for (k=0; k<bs; k++) {
1598           for (l=0; l<bs; l++) {
1599             if (PetscRealPart(*aa++) >=  0.) continue;
1600             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1601           }
1602         }
1603       }
1604     }
1605     color = PETSC_DRAW_CYAN;
1606     for (i=0,row=0; i<mbs; i++,row+=bs) {
1607       for (j=a->i[i]; j<a->i[i+1]; j++) {
1608         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1609         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1610         aa  = a->a + j*bs2;
1611         for (k=0; k<bs; k++) {
1612           for (l=0; l<bs; l++) {
1613             if (PetscRealPart(*aa++) != 0.) continue;
1614             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1615           }
1616         }
1617       }
1618     }
1619     color = PETSC_DRAW_RED;
1620     for (i=0,row=0; i<mbs; i++,row+=bs) {
1621       for (j=a->i[i]; j<a->i[i+1]; j++) {
1622         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1623         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1624         aa  = a->a + j*bs2;
1625         for (k=0; k<bs; k++) {
1626           for (l=0; l<bs; l++) {
1627             if (PetscRealPart(*aa++) <= 0.) continue;
1628             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1629           }
1630         }
1631       }
1632     }
1633     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1634   } else {
1635     /* use contour shading to indicate magnitude of values */
1636     /* first determine max of all nonzero values */
1637     PetscReal minv = 0.0, maxv = 0.0;
1638     PetscDraw popup;
1639 
1640     for (i=0; i<a->nz*a->bs2; i++) {
1641       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1642     }
1643     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1644     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1645     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1646 
1647     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1648     for (i=0,row=0; i<mbs; i++,row+=bs) {
1649       for (j=a->i[i]; j<a->i[i+1]; j++) {
1650         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1651         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1652         aa  = a->a + j*bs2;
1653         for (k=0; k<bs; k++) {
1654           for (l=0; l<bs; l++) {
1655             MatScalar v = *aa++;
1656             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1657             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1658           }
1659         }
1660       }
1661     }
1662     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1663   }
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1668 {
1669   PetscErrorCode ierr;
1670   PetscReal      xl,yl,xr,yr,w,h;
1671   PetscDraw      draw;
1672   PetscBool      isnull;
1673 
1674   PetscFunctionBegin;
1675   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1676   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1677   if (isnull) PetscFunctionReturn(0);
1678 
1679   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1680   xr  += w;          yr += h;        xl = -w;     yl = -h;
1681   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1682   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1683   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1684   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1685   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1690 {
1691   PetscErrorCode ierr;
1692   PetscBool      iascii,isbinary,isdraw;
1693 
1694   PetscFunctionBegin;
1695   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1696   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1697   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1698   if (iascii) {
1699     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1700   } else if (isbinary) {
1701     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1702   } else if (isdraw) {
1703     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1704   } else {
1705     Mat B;
1706     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1707     ierr = MatView(B,viewer);CHKERRQ(ierr);
1708     ierr = MatDestroy(&B);CHKERRQ(ierr);
1709   }
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 
1714 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1715 {
1716   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1717   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1718   PetscInt    *ai = a->i,*ailen = a->ilen;
1719   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1720   MatScalar   *ap,*aa = a->a;
1721 
1722   PetscFunctionBegin;
1723   for (k=0; k<m; k++) { /* loop over rows */
1724     row = im[k]; brow = row/bs;
1725     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1726     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1727     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1728     nrow = ailen[brow];
1729     for (l=0; l<n; l++) { /* loop over columns */
1730       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1731       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1732       col  = in[l];
1733       bcol = col/bs;
1734       cidx = col%bs;
1735       ridx = row%bs;
1736       high = nrow;
1737       low  = 0; /* assume unsorted */
1738       while (high-low > 5) {
1739         t = (low+high)/2;
1740         if (rp[t] > bcol) high = t;
1741         else             low  = t;
1742       }
1743       for (i=low; i<high; i++) {
1744         if (rp[i] > bcol) break;
1745         if (rp[i] == bcol) {
1746           *v++ = ap[bs2*i+bs*cidx+ridx];
1747           goto finished;
1748         }
1749       }
1750       *v++ = 0.0;
1751 finished:;
1752     }
1753   }
1754   PetscFunctionReturn(0);
1755 }
1756 
1757 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1758 {
1759   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1760   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1761   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1762   PetscErrorCode    ierr;
1763   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1764   PetscBool         roworiented=a->roworiented;
1765   const PetscScalar *value     = v;
1766   MatScalar         *ap=NULL,*aa = a->a,*bap;
1767 
1768   PetscFunctionBegin;
1769   if (roworiented) {
1770     stepval = (n-1)*bs;
1771   } else {
1772     stepval = (m-1)*bs;
1773   }
1774   for (k=0; k<m; k++) { /* loop over added rows */
1775     row = im[k];
1776     if (row < 0) continue;
1777 #if defined(PETSC_USE_DEBUG)
1778     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1779 #endif
1780     rp   = aj + ai[row];
1781     if (!A->structure_only) ap = aa + bs2*ai[row];
1782     rmax = imax[row];
1783     nrow = ailen[row];
1784     low  = 0;
1785     high = nrow;
1786     for (l=0; l<n; l++) { /* loop over added columns */
1787       if (in[l] < 0) continue;
1788 #if defined(PETSC_USE_DEBUG)
1789       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1790 #endif
1791       col = in[l];
1792       if (!A->structure_only) {
1793         if (roworiented) {
1794           value = v + (k*(stepval+bs) + l)*bs;
1795         } else {
1796           value = v + (l*(stepval+bs) + k)*bs;
1797         }
1798       }
1799       if (col <= lastcol) low = 0;
1800       else high = nrow;
1801       lastcol = col;
1802       while (high-low > 7) {
1803         t = (low+high)/2;
1804         if (rp[t] > col) high = t;
1805         else             low  = t;
1806       }
1807       for (i=low; i<high; i++) {
1808         if (rp[i] > col) break;
1809         if (rp[i] == col) {
1810           if (A->structure_only) goto noinsert2;
1811           bap = ap +  bs2*i;
1812           if (roworiented) {
1813             if (is == ADD_VALUES) {
1814               for (ii=0; ii<bs; ii++,value+=stepval) {
1815                 for (jj=ii; jj<bs2; jj+=bs) {
1816                   bap[jj] += *value++;
1817                 }
1818               }
1819             } else {
1820               for (ii=0; ii<bs; ii++,value+=stepval) {
1821                 for (jj=ii; jj<bs2; jj+=bs) {
1822                   bap[jj] = *value++;
1823                 }
1824               }
1825             }
1826           } else {
1827             if (is == ADD_VALUES) {
1828               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1829                 for (jj=0; jj<bs; jj++) {
1830                   bap[jj] += value[jj];
1831                 }
1832                 bap += bs;
1833               }
1834             } else {
1835               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1836                 for (jj=0; jj<bs; jj++) {
1837                   bap[jj]  = value[jj];
1838                 }
1839                 bap += bs;
1840               }
1841             }
1842           }
1843           goto noinsert2;
1844         }
1845       }
1846       if (nonew == 1) goto noinsert2;
1847       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1848       if (A->structure_only) {
1849         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1850       } else {
1851         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1852       }
1853       N = nrow++ - 1; high++;
1854       /* shift up all the later entries in this row */
1855       for (ii=N; ii>=i; ii--) {
1856         rp[ii+1] = rp[ii];
1857         if (!A->structure_only) {
1858           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1859         }
1860       }
1861       if (N >= i && !A->structure_only) {
1862         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1863       }
1864 
1865       rp[i] = col;
1866       if (!A->structure_only) {
1867         bap   = ap +  bs2*i;
1868         if (roworiented) {
1869           for (ii=0; ii<bs; ii++,value+=stepval) {
1870             for (jj=ii; jj<bs2; jj+=bs) {
1871               bap[jj] = *value++;
1872             }
1873           }
1874         } else {
1875           for (ii=0; ii<bs; ii++,value+=stepval) {
1876             for (jj=0; jj<bs; jj++) {
1877               *bap++ = *value++;
1878             }
1879           }
1880         }
1881       }
1882 noinsert2:;
1883       low = i;
1884     }
1885     ailen[row] = nrow;
1886   }
1887   PetscFunctionReturn(0);
1888 }
1889 
1890 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1891 {
1892   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1893   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1894   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1895   PetscErrorCode ierr;
1896   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1897   MatScalar      *aa  = a->a,*ap;
1898   PetscReal      ratio=0.6;
1899 
1900   PetscFunctionBegin;
1901   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1902 
1903   if (m) rmax = ailen[0];
1904   for (i=1; i<mbs; i++) {
1905     /* move each row back by the amount of empty slots (fshift) before it*/
1906     fshift += imax[i-1] - ailen[i-1];
1907     rmax    = PetscMax(rmax,ailen[i]);
1908     if (fshift) {
1909       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1910       N  = ailen[i];
1911       for (j=0; j<N; j++) {
1912         ip[j-fshift] = ip[j];
1913         if (!A->structure_only) {
1914           ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1915         }
1916       }
1917     }
1918     ai[i] = ai[i-1] + ailen[i-1];
1919   }
1920   if (mbs) {
1921     fshift += imax[mbs-1] - ailen[mbs-1];
1922     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1923   }
1924 
1925   /* reset ilen and imax for each row */
1926   a->nonzerorowcnt = 0;
1927   if (A->structure_only) {
1928     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1929   } else { /* !A->structure_only */
1930     for (i=0; i<mbs; i++) {
1931       ailen[i] = imax[i] = ai[i+1] - ai[i];
1932       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1933     }
1934   }
1935   a->nz = ai[mbs];
1936 
1937   /* diagonals may have moved, so kill the diagonal pointers */
1938   a->idiagvalid = PETSC_FALSE;
1939   if (fshift && a->diag) {
1940     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1941     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1942     a->diag = 0;
1943   }
1944   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1945   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1946   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1947   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1948 
1949   A->info.mallocs    += a->reallocs;
1950   a->reallocs         = 0;
1951   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1952   a->rmax             = rmax;
1953 
1954   if (!A->structure_only) {
1955     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1956   }
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 /*
1961    This function returns an array of flags which indicate the locations of contiguous
1962    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1963    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1964    Assume: sizes should be long enough to hold all the values.
1965 */
1966 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1967 {
1968   PetscInt  i,j,k,row;
1969   PetscBool flg;
1970 
1971   PetscFunctionBegin;
1972   for (i=0,j=0; i<n; j++) {
1973     row = idx[i];
1974     if (row%bs!=0) { /* Not the begining of a block */
1975       sizes[j] = 1;
1976       i++;
1977     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1978       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1979       i++;
1980     } else { /* Begining of the block, so check if the complete block exists */
1981       flg = PETSC_TRUE;
1982       for (k=1; k<bs; k++) {
1983         if (row+k != idx[i+k]) { /* break in the block */
1984           flg = PETSC_FALSE;
1985           break;
1986         }
1987       }
1988       if (flg) { /* No break in the bs */
1989         sizes[j] = bs;
1990         i       += bs;
1991       } else {
1992         sizes[j] = 1;
1993         i++;
1994       }
1995     }
1996   }
1997   *bs_max = j;
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2002 {
2003   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2004   PetscErrorCode    ierr;
2005   PetscInt          i,j,k,count,*rows;
2006   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2007   PetscScalar       zero = 0.0;
2008   MatScalar         *aa;
2009   const PetscScalar *xx;
2010   PetscScalar       *bb;
2011 
2012   PetscFunctionBegin;
2013   /* fix right hand side if needed */
2014   if (x && b) {
2015     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2016     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2017     for (i=0; i<is_n; i++) {
2018       bb[is_idx[i]] = diag*xx[is_idx[i]];
2019     }
2020     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2021     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2022   }
2023 
2024   /* Make a copy of the IS and  sort it */
2025   /* allocate memory for rows,sizes */
2026   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2027 
2028   /* copy IS values to rows, and sort them */
2029   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2030   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2031 
2032   if (baij->keepnonzeropattern) {
2033     for (i=0; i<is_n; i++) sizes[i] = 1;
2034     bs_max          = is_n;
2035   } else {
2036     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2037     A->nonzerostate++;
2038   }
2039 
2040   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2041     row = rows[j];
2042     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2043     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2044     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2045     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2046       if (diag != (PetscScalar)0.0) {
2047         if (baij->ilen[row/bs] > 0) {
2048           baij->ilen[row/bs]       = 1;
2049           baij->j[baij->i[row/bs]] = row/bs;
2050 
2051           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2052         }
2053         /* Now insert all the diagonal values for this bs */
2054         for (k=0; k<bs; k++) {
2055           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2056         }
2057       } else { /* (diag == 0.0) */
2058         baij->ilen[row/bs] = 0;
2059       } /* end (diag == 0.0) */
2060     } else { /* (sizes[i] != bs) */
2061 #if defined(PETSC_USE_DEBUG)
2062       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2063 #endif
2064       for (k=0; k<count; k++) {
2065         aa[0] =  zero;
2066         aa   += bs;
2067       }
2068       if (diag != (PetscScalar)0.0) {
2069         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2070       }
2071     }
2072   }
2073 
2074   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2075   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2080 {
2081   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2082   PetscErrorCode    ierr;
2083   PetscInt          i,j,k,count;
2084   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2085   PetscScalar       zero = 0.0;
2086   MatScalar         *aa;
2087   const PetscScalar *xx;
2088   PetscScalar       *bb;
2089   PetscBool         *zeroed,vecs = PETSC_FALSE;
2090 
2091   PetscFunctionBegin;
2092   /* fix right hand side if needed */
2093   if (x && b) {
2094     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2095     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2096     vecs = PETSC_TRUE;
2097   }
2098 
2099   /* zero the columns */
2100   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2101   for (i=0; i<is_n; i++) {
2102     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2103     zeroed[is_idx[i]] = PETSC_TRUE;
2104   }
2105   for (i=0; i<A->rmap->N; i++) {
2106     if (!zeroed[i]) {
2107       row = i/bs;
2108       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2109         for (k=0; k<bs; k++) {
2110           col = bs*baij->j[j] + k;
2111           if (zeroed[col]) {
2112             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2113             if (vecs) bb[i] -= aa[0]*xx[col];
2114             aa[0] = 0.0;
2115           }
2116         }
2117       }
2118     } else if (vecs) bb[i] = diag*xx[i];
2119   }
2120   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2121   if (vecs) {
2122     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2123     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2124   }
2125 
2126   /* zero the rows */
2127   for (i=0; i<is_n; i++) {
2128     row   = is_idx[i];
2129     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2130     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2131     for (k=0; k<count; k++) {
2132       aa[0] =  zero;
2133       aa   += bs;
2134     }
2135     if (diag != (PetscScalar)0.0) {
2136       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2137     }
2138   }
2139   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2140   PetscFunctionReturn(0);
2141 }
2142 
2143 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2144 {
2145   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2146   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2147   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2148   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2149   PetscErrorCode ierr;
2150   PetscInt       ridx,cidx,bs2=a->bs2;
2151   PetscBool      roworiented=a->roworiented;
2152   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2153 
2154   PetscFunctionBegin;
2155   for (k=0; k<m; k++) { /* loop over added rows */
2156     row  = im[k];
2157     brow = row/bs;
2158     if (row < 0) continue;
2159 #if defined(PETSC_USE_DEBUG)
2160     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2161 #endif
2162     rp   = aj + ai[brow];
2163     if (!A->structure_only) ap = aa + bs2*ai[brow];
2164     rmax = imax[brow];
2165     nrow = ailen[brow];
2166     low  = 0;
2167     high = nrow;
2168     for (l=0; l<n; l++) { /* loop over added columns */
2169       if (in[l] < 0) continue;
2170 #if defined(PETSC_USE_DEBUG)
2171       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2172 #endif
2173       col  = in[l]; bcol = col/bs;
2174       ridx = row % bs; cidx = col % bs;
2175       if (!A->structure_only) {
2176         if (roworiented) {
2177           value = v[l + k*n];
2178         } else {
2179           value = v[k + l*m];
2180         }
2181       }
2182       if (col <= lastcol) low = 0; else high = nrow;
2183       lastcol = col;
2184       while (high-low > 7) {
2185         t = (low+high)/2;
2186         if (rp[t] > bcol) high = t;
2187         else              low  = t;
2188       }
2189       for (i=low; i<high; i++) {
2190         if (rp[i] > bcol) break;
2191         if (rp[i] == bcol) {
2192           bap = ap +  bs2*i + bs*cidx + ridx;
2193           if (!A->structure_only) {
2194             if (is == ADD_VALUES) *bap += value;
2195             else                  *bap  = value;
2196           }
2197           goto noinsert1;
2198         }
2199       }
2200       if (nonew == 1) goto noinsert1;
2201       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2202       if (A->structure_only) {
2203         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2204       } else {
2205         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2206       }
2207       N = nrow++ - 1; high++;
2208       /* shift up all the later entries in this row */
2209       for (ii=N; ii>=i; ii--) {
2210         rp[ii+1] = rp[ii];
2211         if (!A->structure_only) {
2212           ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2213         }
2214       }
2215       if (N>=i && !A->structure_only) {
2216         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2217       }
2218       rp[i]                      = bcol;
2219       if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value;
2220       a->nz++;
2221       A->nonzerostate++;
2222 noinsert1:;
2223       low = i;
2224     }
2225     ailen[brow] = nrow;
2226   }
2227   PetscFunctionReturn(0);
2228 }
2229 
2230 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2231 {
2232   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2233   Mat            outA;
2234   PetscErrorCode ierr;
2235   PetscBool      row_identity,col_identity;
2236 
2237   PetscFunctionBegin;
2238   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2239   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2240   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2241   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2242 
2243   outA            = inA;
2244   inA->factortype = MAT_FACTOR_LU;
2245   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2246   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2247 
2248   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2249 
2250   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2251   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2252   a->row = row;
2253   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2254   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2255   a->col = col;
2256 
2257   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2258   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2259   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2260   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2261 
2262   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2263   if (!a->solve_work) {
2264     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2265     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2266   }
2267   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2272 {
2273   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2274   PetscInt    i,nz,mbs;
2275 
2276   PetscFunctionBegin;
2277   nz  = baij->maxnz;
2278   mbs = baij->mbs;
2279   for (i=0; i<nz; i++) {
2280     baij->j[i] = indices[i];
2281   }
2282   baij->nz = nz;
2283   for (i=0; i<mbs; i++) {
2284     baij->ilen[i] = baij->imax[i];
2285   }
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 /*@
2290     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2291        in the matrix.
2292 
2293   Input Parameters:
2294 +  mat - the SeqBAIJ matrix
2295 -  indices - the column indices
2296 
2297   Level: advanced
2298 
2299   Notes:
2300     This can be called if you have precomputed the nonzero structure of the
2301   matrix and want to provide it to the matrix object to improve the performance
2302   of the MatSetValues() operation.
2303 
2304     You MUST have set the correct numbers of nonzeros per row in the call to
2305   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2306 
2307     MUST be called before any calls to MatSetValues();
2308 
2309 @*/
2310 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2311 {
2312   PetscErrorCode ierr;
2313 
2314   PetscFunctionBegin;
2315   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2316   PetscValidPointer(indices,2);
2317   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2322 {
2323   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2324   PetscErrorCode ierr;
2325   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2326   PetscReal      atmp;
2327   PetscScalar    *x,zero = 0.0;
2328   MatScalar      *aa;
2329   PetscInt       ncols,brow,krow,kcol;
2330 
2331   PetscFunctionBegin;
2332   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2333   bs  = A->rmap->bs;
2334   aa  = a->a;
2335   ai  = a->i;
2336   aj  = a->j;
2337   mbs = a->mbs;
2338 
2339   ierr = VecSet(v,zero);CHKERRQ(ierr);
2340   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2341   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2342   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2343   for (i=0; i<mbs; i++) {
2344     ncols = ai[1] - ai[0]; ai++;
2345     brow  = bs*i;
2346     for (j=0; j<ncols; j++) {
2347       for (kcol=0; kcol<bs; kcol++) {
2348         for (krow=0; krow<bs; krow++) {
2349           atmp = PetscAbsScalar(*aa);aa++;
2350           row  = brow + krow;   /* row index */
2351           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2352         }
2353       }
2354       aj++;
2355     }
2356   }
2357   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2358   PetscFunctionReturn(0);
2359 }
2360 
2361 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2362 {
2363   PetscErrorCode ierr;
2364 
2365   PetscFunctionBegin;
2366   /* If the two matrices have the same copy implementation, use fast copy. */
2367   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2368     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2369     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2370     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2371 
2372     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2373     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2374     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2375     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2376   } else {
2377     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2378   }
2379   PetscFunctionReturn(0);
2380 }
2381 
2382 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2383 {
2384   PetscErrorCode ierr;
2385 
2386   PetscFunctionBegin;
2387   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2392 {
2393   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2394 
2395   PetscFunctionBegin;
2396   *array = a->a;
2397   PetscFunctionReturn(0);
2398 }
2399 
2400 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2401 {
2402   PetscFunctionBegin;
2403   PetscFunctionReturn(0);
2404 }
2405 
2406 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2407 {
2408   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2409   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2410   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2411   PetscErrorCode ierr;
2412 
2413   PetscFunctionBegin;
2414   /* Set the number of nonzeros in the new matrix */
2415   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2416   PetscFunctionReturn(0);
2417 }
2418 
2419 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2420 {
2421   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2422   PetscErrorCode ierr;
2423   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2424   PetscBLASInt   one=1;
2425 
2426   PetscFunctionBegin;
2427   if (str == SAME_NONZERO_PATTERN) {
2428     PetscScalar  alpha = a;
2429     PetscBLASInt bnz;
2430     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2431     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2432     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2433   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2434     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2435   } else {
2436     Mat      B;
2437     PetscInt *nnz;
2438     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2439     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2440     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2441     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2442     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2443     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2444     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2445     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2446     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2447     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2448     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2449     ierr = PetscFree(nnz);CHKERRQ(ierr);
2450   }
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2455 {
2456   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2457   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2458   MatScalar   *aa = a->a;
2459 
2460   PetscFunctionBegin;
2461   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2466 {
2467   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2468   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2469   MatScalar   *aa = a->a;
2470 
2471   PetscFunctionBegin;
2472   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2473   PetscFunctionReturn(0);
2474 }
2475 
2476 /*
2477     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2478 */
2479 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2480 {
2481   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2482   PetscErrorCode ierr;
2483   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2484   PetscInt       nz = a->i[m],row,*jj,mr,col;
2485 
2486   PetscFunctionBegin;
2487   *nn = n;
2488   if (!ia) PetscFunctionReturn(0);
2489   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2490   else {
2491     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2492     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2493     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2494     jj   = a->j;
2495     for (i=0; i<nz; i++) {
2496       collengths[jj[i]]++;
2497     }
2498     cia[0] = oshift;
2499     for (i=0; i<n; i++) {
2500       cia[i+1] = cia[i] + collengths[i];
2501     }
2502     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2503     jj   = a->j;
2504     for (row=0; row<m; row++) {
2505       mr = a->i[row+1] - a->i[row];
2506       for (i=0; i<mr; i++) {
2507         col = *jj++;
2508 
2509         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2510       }
2511     }
2512     ierr = PetscFree(collengths);CHKERRQ(ierr);
2513     *ia  = cia; *ja = cja;
2514   }
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2519 {
2520   PetscErrorCode ierr;
2521 
2522   PetscFunctionBegin;
2523   if (!ia) PetscFunctionReturn(0);
2524   ierr = PetscFree(*ia);CHKERRQ(ierr);
2525   ierr = PetscFree(*ja);CHKERRQ(ierr);
2526   PetscFunctionReturn(0);
2527 }
2528 
2529 /*
2530  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2531  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2532  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2533  */
2534 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2535 {
2536   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2537   PetscErrorCode ierr;
2538   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2539   PetscInt       nz = a->i[m],row,*jj,mr,col;
2540   PetscInt       *cspidx;
2541 
2542   PetscFunctionBegin;
2543   *nn = n;
2544   if (!ia) PetscFunctionReturn(0);
2545 
2546   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2547   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2548   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2549   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2550   jj   = a->j;
2551   for (i=0; i<nz; i++) {
2552     collengths[jj[i]]++;
2553   }
2554   cia[0] = oshift;
2555   for (i=0; i<n; i++) {
2556     cia[i+1] = cia[i] + collengths[i];
2557   }
2558   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2559   jj   = a->j;
2560   for (row=0; row<m; row++) {
2561     mr = a->i[row+1] - a->i[row];
2562     for (i=0; i<mr; i++) {
2563       col = *jj++;
2564       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2565       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2566     }
2567   }
2568   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2569   *ia    = cia; *ja = cja;
2570   *spidx = cspidx;
2571   PetscFunctionReturn(0);
2572 }
2573 
2574 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2575 {
2576   PetscErrorCode ierr;
2577 
2578   PetscFunctionBegin;
2579   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2580   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2585 {
2586   PetscErrorCode ierr;
2587   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2588 
2589   PetscFunctionBegin;
2590   if (!Y->preallocated || !aij->nz) {
2591     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2592   }
2593   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2594   PetscFunctionReturn(0);
2595 }
2596 
2597 /* -------------------------------------------------------------------*/
2598 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2599                                        MatGetRow_SeqBAIJ,
2600                                        MatRestoreRow_SeqBAIJ,
2601                                        MatMult_SeqBAIJ_N,
2602                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2603                                        MatMultTranspose_SeqBAIJ,
2604                                        MatMultTransposeAdd_SeqBAIJ,
2605                                        0,
2606                                        0,
2607                                        0,
2608                                /* 10*/ 0,
2609                                        MatLUFactor_SeqBAIJ,
2610                                        0,
2611                                        0,
2612                                        MatTranspose_SeqBAIJ,
2613                                /* 15*/ MatGetInfo_SeqBAIJ,
2614                                        MatEqual_SeqBAIJ,
2615                                        MatGetDiagonal_SeqBAIJ,
2616                                        MatDiagonalScale_SeqBAIJ,
2617                                        MatNorm_SeqBAIJ,
2618                                /* 20*/ 0,
2619                                        MatAssemblyEnd_SeqBAIJ,
2620                                        MatSetOption_SeqBAIJ,
2621                                        MatZeroEntries_SeqBAIJ,
2622                                /* 24*/ MatZeroRows_SeqBAIJ,
2623                                        0,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                /* 29*/ MatSetUp_SeqBAIJ,
2628                                        0,
2629                                        0,
2630                                        0,
2631                                        0,
2632                                /* 34*/ MatDuplicate_SeqBAIJ,
2633                                        0,
2634                                        0,
2635                                        MatILUFactor_SeqBAIJ,
2636                                        0,
2637                                /* 39*/ MatAXPY_SeqBAIJ,
2638                                        MatCreateSubMatrices_SeqBAIJ,
2639                                        MatIncreaseOverlap_SeqBAIJ,
2640                                        MatGetValues_SeqBAIJ,
2641                                        MatCopy_SeqBAIJ,
2642                                /* 44*/ 0,
2643                                        MatScale_SeqBAIJ,
2644                                        MatShift_SeqBAIJ,
2645                                        0,
2646                                        MatZeroRowsColumns_SeqBAIJ,
2647                                /* 49*/ 0,
2648                                        MatGetRowIJ_SeqBAIJ,
2649                                        MatRestoreRowIJ_SeqBAIJ,
2650                                        MatGetColumnIJ_SeqBAIJ,
2651                                        MatRestoreColumnIJ_SeqBAIJ,
2652                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2653                                        0,
2654                                        0,
2655                                        0,
2656                                        MatSetValuesBlocked_SeqBAIJ,
2657                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2658                                        MatDestroy_SeqBAIJ,
2659                                        MatView_SeqBAIJ,
2660                                        0,
2661                                        0,
2662                                /* 64*/ 0,
2663                                        0,
2664                                        0,
2665                                        0,
2666                                        0,
2667                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2668                                        0,
2669                                        MatConvert_Basic,
2670                                        0,
2671                                        0,
2672                                /* 74*/ 0,
2673                                        MatFDColoringApply_BAIJ,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                /* 79*/ 0,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                        MatLoad_SeqBAIJ,
2682                                /* 84*/ 0,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                /* 89*/ 0,
2688                                        0,
2689                                        0,
2690                                        0,
2691                                        0,
2692                                /* 94*/ 0,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                /* 99*/ 0,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                /*104*/ 0,
2703                                        MatRealPart_SeqBAIJ,
2704                                        MatImaginaryPart_SeqBAIJ,
2705                                        0,
2706                                        0,
2707                                /*109*/ 0,
2708                                        0,
2709                                        0,
2710                                        0,
2711                                        MatMissingDiagonal_SeqBAIJ,
2712                                /*114*/ 0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                        0,
2717                                /*119*/ 0,
2718                                        0,
2719                                        MatMultHermitianTranspose_SeqBAIJ,
2720                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2721                                        0,
2722                                /*124*/ 0,
2723                                        0,
2724                                        MatInvertBlockDiagonal_SeqBAIJ,
2725                                        0,
2726                                        0,
2727                                /*129*/ 0,
2728                                        0,
2729                                        0,
2730                                        0,
2731                                        0,
2732                                /*134*/ 0,
2733                                        0,
2734                                        0,
2735                                        0,
2736                                        0,
2737                                /*139*/ MatSetBlockSizes_Default,
2738                                        0,
2739                                        0,
2740                                        MatFDColoringSetUp_SeqXAIJ,
2741                                        0,
2742                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2743                                        MatDestroySubMatrices_SeqBAIJ
2744 };
2745 
2746 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2747 {
2748   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2749   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2750   PetscErrorCode ierr;
2751 
2752   PetscFunctionBegin;
2753   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2754 
2755   /* allocate space for values if not already there */
2756   if (!aij->saved_values) {
2757     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2758     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2759   }
2760 
2761   /* copy values over */
2762   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2763   PetscFunctionReturn(0);
2764 }
2765 
2766 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2767 {
2768   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2769   PetscErrorCode ierr;
2770   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2771 
2772   PetscFunctionBegin;
2773   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2774   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2775 
2776   /* copy values over */
2777   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2778   PetscFunctionReturn(0);
2779 }
2780 
2781 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2782 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2783 
2784 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2785 {
2786   Mat_SeqBAIJ    *b;
2787   PetscErrorCode ierr;
2788   PetscInt       i,mbs,nbs,bs2;
2789   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2790 
2791   PetscFunctionBegin;
2792   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2793   if (nz == MAT_SKIP_ALLOCATION) {
2794     skipallocation = PETSC_TRUE;
2795     nz             = 0;
2796   }
2797 
2798   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2799   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2800   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2801   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2802 
2803   B->preallocated = PETSC_TRUE;
2804 
2805   mbs = B->rmap->n/bs;
2806   nbs = B->cmap->n/bs;
2807   bs2 = bs*bs;
2808 
2809   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2810 
2811   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2812   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2813   if (nnz) {
2814     for (i=0; i<mbs; i++) {
2815       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2816       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2817     }
2818   }
2819 
2820   b    = (Mat_SeqBAIJ*)B->data;
2821   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2822   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2823   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2824 
2825   if (!flg) {
2826     switch (bs) {
2827     case 1:
2828       B->ops->mult    = MatMult_SeqBAIJ_1;
2829       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2830       break;
2831     case 2:
2832       B->ops->mult    = MatMult_SeqBAIJ_2;
2833       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2834       break;
2835     case 3:
2836       B->ops->mult    = MatMult_SeqBAIJ_3;
2837       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2838       break;
2839     case 4:
2840       B->ops->mult    = MatMult_SeqBAIJ_4;
2841       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2842       break;
2843     case 5:
2844       B->ops->mult    = MatMult_SeqBAIJ_5;
2845       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2846       break;
2847     case 6:
2848       B->ops->mult    = MatMult_SeqBAIJ_6;
2849       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2850       break;
2851     case 7:
2852       B->ops->mult    = MatMult_SeqBAIJ_7;
2853       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2854       break;
2855     case 9:
2856 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2857       B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2858       B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2859 #else
2860       B->ops->mult    = MatMult_SeqBAIJ_N;
2861       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2862 #endif
2863       break;
2864     case 11:
2865       B->ops->mult    = MatMult_SeqBAIJ_11;
2866       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2867       break;
2868     case 15:
2869       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2870       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2871       break;
2872     default:
2873       B->ops->mult    = MatMult_SeqBAIJ_N;
2874       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2875       break;
2876     }
2877   }
2878   B->ops->sor = MatSOR_SeqBAIJ;
2879   b->mbs = mbs;
2880   b->nbs = nbs;
2881   if (!skipallocation) {
2882     if (!b->imax) {
2883       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2884       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2885 
2886       b->free_imax_ilen = PETSC_TRUE;
2887     }
2888     /* b->ilen will count nonzeros in each block row so far. */
2889     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2890     if (!nnz) {
2891       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2892       else if (nz < 0) nz = 1;
2893       for (i=0; i<mbs; i++) b->imax[i] = nz;
2894       nz = nz*mbs;
2895     } else {
2896       nz = 0;
2897       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2898     }
2899 
2900     /* allocate the matrix space */
2901     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2902     if (B->structure_only) {
2903       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2904       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2905       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2906     } else {
2907       ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2908       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2909       ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2910     }
2911     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2912 
2913     if (B->structure_only) {
2914       b->singlemalloc = PETSC_FALSE;
2915       b->free_a       = PETSC_FALSE;
2916     } else {
2917       b->singlemalloc = PETSC_TRUE;
2918       b->free_a       = PETSC_TRUE;
2919     }
2920     b->free_ij = PETSC_TRUE;
2921 
2922     b->i[0] = 0;
2923     for (i=1; i<mbs+1; i++) {
2924       b->i[i] = b->i[i-1] + b->imax[i-1];
2925     }
2926 
2927   } else {
2928     b->free_a  = PETSC_FALSE;
2929     b->free_ij = PETSC_FALSE;
2930   }
2931 
2932   b->bs2              = bs2;
2933   b->mbs              = mbs;
2934   b->nz               = 0;
2935   b->maxnz            = nz;
2936   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2937   B->was_assembled    = PETSC_FALSE;
2938   B->assembled        = PETSC_FALSE;
2939   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2940   PetscFunctionReturn(0);
2941 }
2942 
2943 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2944 {
2945   PetscInt       i,m,nz,nz_max=0,*nnz;
2946   PetscScalar    *values=0;
2947   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2948   PetscErrorCode ierr;
2949 
2950   PetscFunctionBegin;
2951   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2952   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2953   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2954   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2955   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2956   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2957   m    = B->rmap->n/bs;
2958 
2959   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2960   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2961   for (i=0; i<m; i++) {
2962     nz = ii[i+1]- ii[i];
2963     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2964     nz_max = PetscMax(nz_max, nz);
2965     nnz[i] = nz;
2966   }
2967   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2968   ierr = PetscFree(nnz);CHKERRQ(ierr);
2969 
2970   values = (PetscScalar*)V;
2971   if (!values) {
2972     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2973   }
2974   for (i=0; i<m; i++) {
2975     PetscInt          ncols  = ii[i+1] - ii[i];
2976     const PetscInt    *icols = jj + ii[i];
2977     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2978     if (!roworiented) {
2979       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2980     } else {
2981       PetscInt j;
2982       for (j=0; j<ncols; j++) {
2983         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2984         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2985       }
2986     }
2987   }
2988   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2989   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2990   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2991   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2992   PetscFunctionReturn(0);
2993 }
2994 
2995 /*MC
2996    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2997    block sparse compressed row format.
2998 
2999    Options Database Keys:
3000 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3001 
3002   Level: beginner
3003 
3004 .seealso: MatCreateSeqBAIJ()
3005 M*/
3006 
3007 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3008 
3009 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3010 {
3011   PetscErrorCode ierr;
3012   PetscMPIInt    size;
3013   Mat_SeqBAIJ    *b;
3014 
3015   PetscFunctionBegin;
3016   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3017   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3018 
3019   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3020   B->data = (void*)b;
3021   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3022 
3023   b->row          = 0;
3024   b->col          = 0;
3025   b->icol         = 0;
3026   b->reallocs     = 0;
3027   b->saved_values = 0;
3028 
3029   b->roworiented        = PETSC_TRUE;
3030   b->nonew              = 0;
3031   b->diag               = 0;
3032   B->spptr              = 0;
3033   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3034   b->keepnonzeropattern = PETSC_FALSE;
3035 
3036   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3037   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3039   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3045 #if defined(PETSC_HAVE_HYPRE)
3046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3047 #endif
3048   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3049   PetscFunctionReturn(0);
3050 }
3051 
3052 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3053 {
3054   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3055   PetscErrorCode ierr;
3056   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3057 
3058   PetscFunctionBegin;
3059   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3060 
3061   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3062     c->imax           = a->imax;
3063     c->ilen           = a->ilen;
3064     c->free_imax_ilen = PETSC_FALSE;
3065   } else {
3066     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3067     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3068     for (i=0; i<mbs; i++) {
3069       c->imax[i] = a->imax[i];
3070       c->ilen[i] = a->ilen[i];
3071     }
3072     c->free_imax_ilen = PETSC_TRUE;
3073   }
3074 
3075   /* allocate the matrix space */
3076   if (mallocmatspace) {
3077     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3078       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3079       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3080 
3081       c->i            = a->i;
3082       c->j            = a->j;
3083       c->singlemalloc = PETSC_FALSE;
3084       c->free_a       = PETSC_TRUE;
3085       c->free_ij      = PETSC_FALSE;
3086       c->parent       = A;
3087       C->preallocated = PETSC_TRUE;
3088       C->assembled    = PETSC_TRUE;
3089 
3090       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3091       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3092       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3093     } else {
3094       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3095       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3096 
3097       c->singlemalloc = PETSC_TRUE;
3098       c->free_a       = PETSC_TRUE;
3099       c->free_ij      = PETSC_TRUE;
3100 
3101       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3102       if (mbs > 0) {
3103         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3104         if (cpvalues == MAT_COPY_VALUES) {
3105           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3106         } else {
3107           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3108         }
3109       }
3110       C->preallocated = PETSC_TRUE;
3111       C->assembled    = PETSC_TRUE;
3112     }
3113   }
3114 
3115   c->roworiented = a->roworiented;
3116   c->nonew       = a->nonew;
3117 
3118   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3119   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3120 
3121   c->bs2         = a->bs2;
3122   c->mbs         = a->mbs;
3123   c->nbs         = a->nbs;
3124 
3125   if (a->diag) {
3126     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3127       c->diag      = a->diag;
3128       c->free_diag = PETSC_FALSE;
3129     } else {
3130       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3131       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3132       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3133       c->free_diag = PETSC_TRUE;
3134     }
3135   } else c->diag = 0;
3136 
3137   c->nz         = a->nz;
3138   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3139   c->solve_work = NULL;
3140   c->mult_work  = NULL;
3141   c->sor_workt  = NULL;
3142   c->sor_work   = NULL;
3143 
3144   c->compressedrow.use   = a->compressedrow.use;
3145   c->compressedrow.nrows = a->compressedrow.nrows;
3146   if (a->compressedrow.use) {
3147     i    = a->compressedrow.nrows;
3148     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3149     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3150     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3151     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3152   } else {
3153     c->compressedrow.use    = PETSC_FALSE;
3154     c->compressedrow.i      = NULL;
3155     c->compressedrow.rindex = NULL;
3156   }
3157   C->nonzerostate = A->nonzerostate;
3158 
3159   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3160   PetscFunctionReturn(0);
3161 }
3162 
3163 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3164 {
3165   PetscErrorCode ierr;
3166 
3167   PetscFunctionBegin;
3168   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3169   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3170   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3171   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3172   PetscFunctionReturn(0);
3173 }
3174 
3175 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3176 {
3177   Mat_SeqBAIJ    *a;
3178   PetscErrorCode ierr;
3179   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3180   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3181   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3182   PetscInt       *masked,nmask,tmp,bs2,ishift;
3183   PetscMPIInt    size;
3184   int            fd;
3185   PetscScalar    *aa;
3186   MPI_Comm       comm;
3187 
3188   PetscFunctionBegin;
3189   /* force binary viewer to load .info file if it has not yet done so */
3190   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3191   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3192   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3193   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3194   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3195   if (bs < 0) bs = 1;
3196   bs2  = bs*bs;
3197 
3198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3199   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3200   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3201   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3202   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3203   M = header[1]; N = header[2]; nz = header[3];
3204 
3205   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3206   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3207 
3208   /*
3209      This code adds extra rows to make sure the number of rows is
3210     divisible by the blocksize
3211   */
3212   mbs        = M/bs;
3213   extra_rows = bs - M + bs*(mbs);
3214   if (extra_rows == bs) extra_rows = 0;
3215   else mbs++;
3216   if (extra_rows) {
3217     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3218   }
3219 
3220   /* Set global sizes if not already set */
3221   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3222     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3223   } else { /* Check if the matrix global sizes are correct */
3224     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3225     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3226       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3227     }
3228     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3229   }
3230 
3231   /* read in row lengths */
3232   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3233   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3234   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3235 
3236   /* read in column indices */
3237   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3238   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3239   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3240 
3241   /* loop over row lengths determining block row lengths */
3242   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3243   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3244   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3245   rowcount = 0;
3246   nzcount  = 0;
3247   for (i=0; i<mbs; i++) {
3248     nmask = 0;
3249     for (j=0; j<bs; j++) {
3250       kmax = rowlengths[rowcount];
3251       for (k=0; k<kmax; k++) {
3252         tmp = jj[nzcount++]/bs;
3253         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3254       }
3255       rowcount++;
3256     }
3257     browlengths[i] += nmask;
3258     /* zero out the mask elements we set */
3259     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3260   }
3261 
3262   /* Do preallocation  */
3263   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3264   a    = (Mat_SeqBAIJ*)newmat->data;
3265 
3266   /* set matrix "i" values */
3267   a->i[0] = 0;
3268   for (i=1; i<= mbs; i++) {
3269     a->i[i]      = a->i[i-1] + browlengths[i-1];
3270     a->ilen[i-1] = browlengths[i-1];
3271   }
3272   a->nz = 0;
3273   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3274 
3275   /* read in nonzero values */
3276   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3277   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3278   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3279 
3280   /* set "a" and "j" values into matrix */
3281   nzcount = 0; jcount = 0;
3282   for (i=0; i<mbs; i++) {
3283     nzcountb = nzcount;
3284     nmask    = 0;
3285     for (j=0; j<bs; j++) {
3286       kmax = rowlengths[i*bs+j];
3287       for (k=0; k<kmax; k++) {
3288         tmp = jj[nzcount++]/bs;
3289         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3290       }
3291     }
3292     /* sort the masked values */
3293     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3294 
3295     /* set "j" values into matrix */
3296     maskcount = 1;
3297     for (j=0; j<nmask; j++) {
3298       a->j[jcount++]  = masked[j];
3299       mask[masked[j]] = maskcount++;
3300     }
3301     /* set "a" values into matrix */
3302     ishift = bs2*a->i[i];
3303     for (j=0; j<bs; j++) {
3304       kmax = rowlengths[i*bs+j];
3305       for (k=0; k<kmax; k++) {
3306         tmp       = jj[nzcountb]/bs;
3307         block     = mask[tmp] - 1;
3308         point     = jj[nzcountb] - bs*tmp;
3309         idx       = ishift + bs2*block + j + bs*point;
3310         a->a[idx] = (MatScalar)aa[nzcountb++];
3311       }
3312     }
3313     /* zero out the mask elements we set */
3314     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3315   }
3316   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3317 
3318   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3319   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3320   ierr = PetscFree(aa);CHKERRQ(ierr);
3321   ierr = PetscFree(jj);CHKERRQ(ierr);
3322   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3323 
3324   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3325   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3326   PetscFunctionReturn(0);
3327 }
3328 
3329 /*@C
3330    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3331    compressed row) format.  For good matrix assembly performance the
3332    user should preallocate the matrix storage by setting the parameter nz
3333    (or the array nnz).  By setting these parameters accurately, performance
3334    during matrix assembly can be increased by more than a factor of 50.
3335 
3336    Collective on MPI_Comm
3337 
3338    Input Parameters:
3339 +  comm - MPI communicator, set to PETSC_COMM_SELF
3340 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3341           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3342 .  m - number of rows
3343 .  n - number of columns
3344 .  nz - number of nonzero blocks  per block row (same for all rows)
3345 -  nnz - array containing the number of nonzero blocks in the various block rows
3346          (possibly different for each block row) or NULL
3347 
3348    Output Parameter:
3349 .  A - the matrix
3350 
3351    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3352    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3353    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3354 
3355    Options Database Keys:
3356 .   -mat_no_unroll - uses code that does not unroll the loops in the
3357                      block calculations (much slower)
3358 .    -mat_block_size - size of the blocks to use
3359 
3360    Level: intermediate
3361 
3362    Notes:
3363    The number of rows and columns must be divisible by blocksize.
3364 
3365    If the nnz parameter is given then the nz parameter is ignored
3366 
3367    A nonzero block is any block that as 1 or more nonzeros in it
3368 
3369    The block AIJ format is fully compatible with standard Fortran 77
3370    storage.  That is, the stored row and column indices can begin at
3371    either one (as in Fortran) or zero.  See the users' manual for details.
3372 
3373    Specify the preallocated storage with either nz or nnz (not both).
3374    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3375    allocation.  See Users-Manual: ch_mat for details.
3376    matrices.
3377 
3378 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3379 @*/
3380 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3381 {
3382   PetscErrorCode ierr;
3383 
3384   PetscFunctionBegin;
3385   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3386   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3387   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3388   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3389   PetscFunctionReturn(0);
3390 }
3391 
3392 /*@C
3393    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3394    per row in the matrix. For good matrix assembly performance the
3395    user should preallocate the matrix storage by setting the parameter nz
3396    (or the array nnz).  By setting these parameters accurately, performance
3397    during matrix assembly can be increased by more than a factor of 50.
3398 
3399    Collective on MPI_Comm
3400 
3401    Input Parameters:
3402 +  B - the matrix
3403 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3404           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3405 .  nz - number of block nonzeros per block row (same for all rows)
3406 -  nnz - array containing the number of block nonzeros in the various block rows
3407          (possibly different for each block row) or NULL
3408 
3409    Options Database Keys:
3410 .   -mat_no_unroll - uses code that does not unroll the loops in the
3411                      block calculations (much slower)
3412 .   -mat_block_size - size of the blocks to use
3413 
3414    Level: intermediate
3415 
3416    Notes:
3417    If the nnz parameter is given then the nz parameter is ignored
3418 
3419    You can call MatGetInfo() to get information on how effective the preallocation was;
3420    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3421    You can also run with the option -info and look for messages with the string
3422    malloc in them to see if additional memory allocation was needed.
3423 
3424    The block AIJ format is fully compatible with standard Fortran 77
3425    storage.  That is, the stored row and column indices can begin at
3426    either one (as in Fortran) or zero.  See the users' manual for details.
3427 
3428    Specify the preallocated storage with either nz or nnz (not both).
3429    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3430    allocation.  See Users-Manual: ch_mat for details.
3431 
3432 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3433 @*/
3434 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3435 {
3436   PetscErrorCode ierr;
3437 
3438   PetscFunctionBegin;
3439   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3440   PetscValidType(B,1);
3441   PetscValidLogicalCollectiveInt(B,bs,2);
3442   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3443   PetscFunctionReturn(0);
3444 }
3445 
3446 /*@C
3447    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3448    (the default sequential PETSc format).
3449 
3450    Collective on MPI_Comm
3451 
3452    Input Parameters:
3453 +  B - the matrix
3454 .  i - the indices into j for the start of each local row (starts with zero)
3455 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3456 -  v - optional values in the matrix
3457 
3458    Level: developer
3459 
3460    Notes:
3461    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3462    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3463    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3464    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3465    block column and the second index is over columns within a block.
3466 
3467 .keywords: matrix, aij, compressed row, sparse
3468 
3469 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3470 @*/
3471 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3472 {
3473   PetscErrorCode ierr;
3474 
3475   PetscFunctionBegin;
3476   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3477   PetscValidType(B,1);
3478   PetscValidLogicalCollectiveInt(B,bs,2);
3479   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3480   PetscFunctionReturn(0);
3481 }
3482 
3483 
3484 /*@
3485      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3486 
3487      Collective on MPI_Comm
3488 
3489    Input Parameters:
3490 +  comm - must be an MPI communicator of size 1
3491 .  bs - size of block
3492 .  m - number of rows
3493 .  n - number of columns
3494 .  i - row indices
3495 .  j - column indices
3496 -  a - matrix values
3497 
3498    Output Parameter:
3499 .  mat - the matrix
3500 
3501    Level: advanced
3502 
3503    Notes:
3504        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3505     once the matrix is destroyed
3506 
3507        You cannot set new nonzero locations into this matrix, that will generate an error.
3508 
3509        The i and j indices are 0 based
3510 
3511        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3512 
3513       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3514       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3515       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3516       with column-major ordering within blocks.
3517 
3518 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3519 
3520 @*/
3521 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3522 {
3523   PetscErrorCode ierr;
3524   PetscInt       ii;
3525   Mat_SeqBAIJ    *baij;
3526 
3527   PetscFunctionBegin;
3528   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3529   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3530 
3531   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3532   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3533   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3534   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3535   baij = (Mat_SeqBAIJ*)(*mat)->data;
3536   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3537   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3538 
3539   baij->i = i;
3540   baij->j = j;
3541   baij->a = a;
3542 
3543   baij->singlemalloc = PETSC_FALSE;
3544   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3545   baij->free_a       = PETSC_FALSE;
3546   baij->free_ij      = PETSC_FALSE;
3547 
3548   for (ii=0; ii<m; ii++) {
3549     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3550 #if defined(PETSC_USE_DEBUG)
3551     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3552 #endif
3553   }
3554 #if defined(PETSC_USE_DEBUG)
3555   for (ii=0; ii<baij->i[m]; ii++) {
3556     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3557     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3558   }
3559 #endif
3560 
3561   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3562   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3563   PetscFunctionReturn(0);
3564 }
3565 
3566 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3567 {
3568   PetscErrorCode ierr;
3569   PetscMPIInt    size;
3570 
3571   PetscFunctionBegin;
3572   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3573   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3574     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3575   } else {
3576     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3577   }
3578   PetscFunctionReturn(0);
3579 }
3580