xref: /petsc/src/mat/impls/baij/seq/baij.c (revision feff33ee0b5b037fa8f9f294dede656a2f85cc47)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,const MatType,MatReuse,Mat*);
17 #endif
18 
19 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
20 {
21   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
22   PetscErrorCode ierr;
23   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
24   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
25   PetscReal      shift = 0.0;
26   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
27 
28   PetscFunctionBegin;
29   allowzeropivot = PetscNot(A->erroriffailure);
30 
31   if (a->idiagvalid) {
32     if (values) *values = a->idiag;
33     PetscFunctionReturn(0);
34   }
35   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
36   diag_offset = a->diag;
37   if (!a->idiag) {
38     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
39     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
40   }
41   diag  = a->idiag;
42   if (values) *values = a->idiag;
43   /* factor and invert each block */
44   switch (bs) {
45   case 1:
46     for (i=0; i<mbs; i++) {
47       odiag    = v + 1*diag_offset[i];
48       diag[0]  = odiag[0];
49 
50       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
51         if (allowzeropivot) {
52           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
53           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
54           A->factorerror_zeropivot_row   = i;
55           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
56         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
57       }
58 
59       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
60       diag    += 1;
61     }
62     break;
63   case 2:
64     for (i=0; i<mbs; i++) {
65       odiag    = v + 4*diag_offset[i];
66       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
67       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
68       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
69       diag    += 4;
70     }
71     break;
72   case 3:
73     for (i=0; i<mbs; i++) {
74       odiag    = v + 9*diag_offset[i];
75       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
76       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
77       diag[8]  = odiag[8];
78       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
79       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
80       diag    += 9;
81     }
82     break;
83   case 4:
84     for (i=0; i<mbs; i++) {
85       odiag  = v + 16*diag_offset[i];
86       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
87       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
88       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
89       diag  += 16;
90     }
91     break;
92   case 5:
93     for (i=0; i<mbs; i++) {
94       odiag  = v + 25*diag_offset[i];
95       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
97       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
98       diag  += 25;
99     }
100     break;
101   case 6:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 36*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
106       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
107       diag  += 36;
108     }
109     break;
110   case 7:
111     for (i=0; i<mbs; i++) {
112       odiag  = v + 49*diag_offset[i];
113       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
114       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
115       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
116       diag  += 49;
117     }
118     break;
119   default:
120     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
121     for (i=0; i<mbs; i++) {
122       odiag  = v + bs2*diag_offset[i];
123       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
124       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
125       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
126       diag  += bs2;
127     }
128     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
129   }
130   a->idiagvalid = PETSC_TRUE;
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
135 {
136   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
137   PetscScalar       *x,*work,*w,*workt,*t;
138   const MatScalar   *v,*aa = a->a, *idiag;
139   const PetscScalar *b,*xb;
140   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
141   PetscErrorCode    ierr;
142   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
143   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
144 
145   PetscFunctionBegin;
146   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
147   its = its*lits;
148   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
149   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
150   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
151   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
152   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
153 
154   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
155 
156   if (!m) PetscFunctionReturn(0);
157   diag  = a->diag;
158   idiag = a->idiag;
159   k    = PetscMax(A->rmap->n,A->cmap->n);
160   if (!a->mult_work) {
161     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
162   }
163   if (!a->sor_workt) {
164     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
165   }
166   if (!a->sor_work) {
167     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
168   }
169   work = a->mult_work;
170   t    = a->sor_workt;
171   w    = a->sor_work;
172 
173   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
174   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
175 
176   if (flag & SOR_ZERO_INITIAL_GUESS) {
177     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
178       switch (bs) {
179       case 1:
180         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
181         t[0] = b[0];
182         i2     = 1;
183         idiag += 1;
184         for (i=1; i<m; i++) {
185           v  = aa + ai[i];
186           vi = aj + ai[i];
187           nz = diag[i] - ai[i];
188           s[0] = b[i2];
189           for (j=0; j<nz; j++) {
190             xw[0] = x[vi[j]];
191             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
192           }
193           t[i2] = s[0];
194           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
195           x[i2]  = xw[0];
196           idiag += 1;
197           i2    += 1;
198         }
199         break;
200       case 2:
201         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
202         t[0] = b[0]; t[1] = b[1];
203         i2     = 2;
204         idiag += 4;
205         for (i=1; i<m; i++) {
206           v  = aa + 4*ai[i];
207           vi = aj + ai[i];
208           nz = diag[i] - ai[i];
209           s[0] = b[i2]; s[1] = b[i2+1];
210           for (j=0; j<nz; j++) {
211             idx = 2*vi[j];
212             it  = 4*j;
213             xw[0] = x[idx]; xw[1] = x[1+idx];
214             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
215           }
216           t[i2] = s[0]; t[i2+1] = s[1];
217           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
218           x[i2]   = xw[0]; x[i2+1] = xw[1];
219           idiag  += 4;
220           i2     += 2;
221         }
222         break;
223       case 3:
224         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
225         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
226         i2     = 3;
227         idiag += 9;
228         for (i=1; i<m; i++) {
229           v  = aa + 9*ai[i];
230           vi = aj + ai[i];
231           nz = diag[i] - ai[i];
232           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
233           while (nz--) {
234             idx = 3*(*vi++);
235             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
236             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
237             v  += 9;
238           }
239           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
240           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
241           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
242           idiag  += 9;
243           i2     += 3;
244         }
245         break;
246       case 4:
247         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
248         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
249         i2     = 4;
250         idiag += 16;
251         for (i=1; i<m; i++) {
252           v  = aa + 16*ai[i];
253           vi = aj + ai[i];
254           nz = diag[i] - ai[i];
255           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
256           while (nz--) {
257             idx = 4*(*vi++);
258             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
259             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
260             v  += 16;
261           }
262           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
263           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
264           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
265           idiag  += 16;
266           i2     += 4;
267         }
268         break;
269       case 5:
270         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
271         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
272         i2     = 5;
273         idiag += 25;
274         for (i=1; i<m; i++) {
275           v  = aa + 25*ai[i];
276           vi = aj + ai[i];
277           nz = diag[i] - ai[i];
278           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
279           while (nz--) {
280             idx = 5*(*vi++);
281             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
282             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
283             v  += 25;
284           }
285           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
286           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
287           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
288           idiag  += 25;
289           i2     += 5;
290         }
291         break;
292       case 6:
293         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
294         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
295         i2     = 6;
296         idiag += 36;
297         for (i=1; i<m; i++) {
298           v  = aa + 36*ai[i];
299           vi = aj + ai[i];
300           nz = diag[i] - ai[i];
301           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
302           while (nz--) {
303             idx = 6*(*vi++);
304             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
305             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
306             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
307             v  += 36;
308           }
309           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
310           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
311           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
312           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
313           idiag  += 36;
314           i2     += 6;
315         }
316         break;
317       case 7:
318         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
319         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
320         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
321         i2     = 7;
322         idiag += 49;
323         for (i=1; i<m; i++) {
324           v  = aa + 49*ai[i];
325           vi = aj + ai[i];
326           nz = diag[i] - ai[i];
327           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
328           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
329           while (nz--) {
330             idx = 7*(*vi++);
331             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
332             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
333             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
334             v  += 49;
335           }
336           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
337           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
338           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
339           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
340           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
341           idiag  += 49;
342           i2     += 7;
343         }
344         break;
345       default:
346         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
347         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
348         i2     = bs;
349         idiag += bs2;
350         for (i=1; i<m; i++) {
351           v  = aa + bs2*ai[i];
352           vi = aj + ai[i];
353           nz = diag[i] - ai[i];
354 
355           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
356           /* copy all rows of x that are needed into contiguous space */
357           workt = work;
358           for (j=0; j<nz; j++) {
359             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
360             workt += bs;
361           }
362           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
363           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
364           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
365 
366           idiag += bs2;
367           i2    += bs;
368         }
369         break;
370       }
371       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
372       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
373       xb = t;
374     }
375     else xb = b;
376     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
377       idiag = a->idiag+bs2*(a->mbs-1);
378       i2 = bs * (m-1);
379       switch (bs) {
380       case 1:
381         s[0]  = xb[i2];
382         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
383         x[i2] = xw[0];
384         i2   -= 1;
385         for (i=m-2; i>=0; i--) {
386           v  = aa + (diag[i]+1);
387           vi = aj + diag[i] + 1;
388           nz = ai[i+1] - diag[i] - 1;
389           s[0] = xb[i2];
390           for (j=0; j<nz; j++) {
391             xw[0] = x[vi[j]];
392             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
393           }
394           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
395           x[i2]  = xw[0];
396           idiag -= 1;
397           i2    -= 1;
398         }
399         break;
400       case 2:
401         s[0]  = xb[i2]; s[1] = xb[i2+1];
402         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
403         x[i2] = xw[0]; x[i2+1] = xw[1];
404         i2    -= 2;
405         idiag -= 4;
406         for (i=m-2; i>=0; i--) {
407           v  = aa + 4*(diag[i] + 1);
408           vi = aj + diag[i] + 1;
409           nz = ai[i+1] - diag[i] - 1;
410           s[0] = xb[i2]; s[1] = xb[i2+1];
411           for (j=0; j<nz; j++) {
412             idx = 2*vi[j];
413             it  = 4*j;
414             xw[0] = x[idx]; xw[1] = x[1+idx];
415             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
416           }
417           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
418           x[i2]   = xw[0]; x[i2+1] = xw[1];
419           idiag  -= 4;
420           i2     -= 2;
421         }
422         break;
423       case 3:
424         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
425         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
426         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
427         i2    -= 3;
428         idiag -= 9;
429         for (i=m-2; i>=0; i--) {
430           v  = aa + 9*(diag[i]+1);
431           vi = aj + diag[i] + 1;
432           nz = ai[i+1] - diag[i] - 1;
433           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
434           while (nz--) {
435             idx = 3*(*vi++);
436             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
437             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
438             v  += 9;
439           }
440           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
441           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
442           idiag  -= 9;
443           i2     -= 3;
444         }
445         break;
446       case 4:
447         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
448         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
449         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
450         i2    -= 4;
451         idiag -= 16;
452         for (i=m-2; i>=0; i--) {
453           v  = aa + 16*(diag[i]+1);
454           vi = aj + diag[i] + 1;
455           nz = ai[i+1] - diag[i] - 1;
456           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
457           while (nz--) {
458             idx = 4*(*vi++);
459             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
460             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
461             v  += 16;
462           }
463           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
464           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
465           idiag  -= 16;
466           i2     -= 4;
467         }
468         break;
469       case 5:
470         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
471         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
472         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
473         i2    -= 5;
474         idiag -= 25;
475         for (i=m-2; i>=0; i--) {
476           v  = aa + 25*(diag[i]+1);
477           vi = aj + diag[i] + 1;
478           nz = ai[i+1] - diag[i] - 1;
479           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
480           while (nz--) {
481             idx = 5*(*vi++);
482             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
483             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
484             v  += 25;
485           }
486           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
487           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
488           idiag  -= 25;
489           i2     -= 5;
490         }
491         break;
492       case 6:
493         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
494         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
495         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
496         i2    -= 6;
497         idiag -= 36;
498         for (i=m-2; i>=0; i--) {
499           v  = aa + 36*(diag[i]+1);
500           vi = aj + diag[i] + 1;
501           nz = ai[i+1] - diag[i] - 1;
502           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
503           while (nz--) {
504             idx = 6*(*vi++);
505             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
506             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
507             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
508             v  += 36;
509           }
510           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
511           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
512           idiag  -= 36;
513           i2     -= 6;
514         }
515         break;
516       case 7:
517         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
518         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
519         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
520         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
521         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
522         i2    -= 7;
523         idiag -= 49;
524         for (i=m-2; i>=0; i--) {
525           v  = aa + 49*(diag[i]+1);
526           vi = aj + diag[i] + 1;
527           nz = ai[i+1] - diag[i] - 1;
528           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
529           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
530           while (nz--) {
531             idx = 7*(*vi++);
532             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
533             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
534             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
535             v  += 49;
536           }
537           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
538           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
539           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
540           idiag  -= 49;
541           i2     -= 7;
542         }
543         break;
544       default:
545         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
546         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
547         i2    -= bs;
548         idiag -= bs2;
549         for (i=m-2; i>=0; i--) {
550           v  = aa + bs2*(diag[i]+1);
551           vi = aj + diag[i] + 1;
552           nz = ai[i+1] - diag[i] - 1;
553 
554           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
555           /* copy all rows of x that are needed into contiguous space */
556           workt = work;
557           for (j=0; j<nz; j++) {
558             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
559             workt += bs;
560           }
561           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
562           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
563 
564           idiag -= bs2;
565           i2    -= bs;
566         }
567         break;
568       }
569       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
570     }
571     its--;
572   }
573   while (its--) {
574     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
575       idiag = a->idiag;
576       i2 = 0;
577       switch (bs) {
578       case 1:
579         for (i=0; i<m; i++) {
580           v  = aa + ai[i];
581           vi = aj + ai[i];
582           nz = ai[i+1] - ai[i];
583           s[0] = b[i2];
584           for (j=0; j<nz; j++) {
585             xw[0] = x[vi[j]];
586             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
587           }
588           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
589           x[i2] += xw[0];
590           idiag += 1;
591           i2    += 1;
592         }
593         break;
594       case 2:
595         for (i=0; i<m; i++) {
596           v  = aa + 4*ai[i];
597           vi = aj + ai[i];
598           nz = ai[i+1] - ai[i];
599           s[0] = b[i2]; s[1] = b[i2+1];
600           for (j=0; j<nz; j++) {
601             idx = 2*vi[j];
602             it  = 4*j;
603             xw[0] = x[idx]; xw[1] = x[1+idx];
604             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
605           }
606           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
607           x[i2]  += xw[0]; x[i2+1] += xw[1];
608           idiag  += 4;
609           i2     += 2;
610         }
611         break;
612       case 3:
613         for (i=0; i<m; i++) {
614           v  = aa + 9*ai[i];
615           vi = aj + ai[i];
616           nz = ai[i+1] - ai[i];
617           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
618           while (nz--) {
619             idx = 3*(*vi++);
620             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
621             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
622             v  += 9;
623           }
624           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
625           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
626           idiag  += 9;
627           i2     += 3;
628         }
629         break;
630       case 4:
631         for (i=0; i<m; i++) {
632           v  = aa + 16*ai[i];
633           vi = aj + ai[i];
634           nz = ai[i+1] - ai[i];
635           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
636           while (nz--) {
637             idx = 4*(*vi++);
638             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
639             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
640             v  += 16;
641           }
642           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
643           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
644           idiag  += 16;
645           i2     += 4;
646         }
647         break;
648       case 5:
649         for (i=0; i<m; i++) {
650           v  = aa + 25*ai[i];
651           vi = aj + ai[i];
652           nz = ai[i+1] - ai[i];
653           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
654           while (nz--) {
655             idx = 5*(*vi++);
656             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
657             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
658             v  += 25;
659           }
660           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
661           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
662           idiag  += 25;
663           i2     += 5;
664         }
665         break;
666       case 6:
667         for (i=0; i<m; i++) {
668           v  = aa + 36*ai[i];
669           vi = aj + ai[i];
670           nz = ai[i+1] - ai[i];
671           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
672           while (nz--) {
673             idx = 6*(*vi++);
674             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
675             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
676             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
677             v  += 36;
678           }
679           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
680           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
681           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
682           idiag  += 36;
683           i2     += 6;
684         }
685         break;
686       case 7:
687         for (i=0; i<m; i++) {
688           v  = aa + 49*ai[i];
689           vi = aj + ai[i];
690           nz = ai[i+1] - ai[i];
691           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
692           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
693           while (nz--) {
694             idx = 7*(*vi++);
695             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
696             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
697             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
698             v  += 49;
699           }
700           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
701           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
702           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
703           idiag  += 49;
704           i2     += 7;
705         }
706         break;
707       default:
708         for (i=0; i<m; i++) {
709           v  = aa + bs2*ai[i];
710           vi = aj + ai[i];
711           nz = ai[i+1] - ai[i];
712 
713           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
714           /* copy all rows of x that are needed into contiguous space */
715           workt = work;
716           for (j=0; j<nz; j++) {
717             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
718             workt += bs;
719           }
720           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
721           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
722 
723           idiag += bs2;
724           i2    += bs;
725         }
726         break;
727       }
728       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
729     }
730     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
731       idiag = a->idiag+bs2*(a->mbs-1);
732       i2 = bs * (m-1);
733       switch (bs) {
734       case 1:
735         for (i=m-1; i>=0; i--) {
736           v  = aa + ai[i];
737           vi = aj + ai[i];
738           nz = ai[i+1] - ai[i];
739           s[0] = b[i2];
740           for (j=0; j<nz; j++) {
741             xw[0] = x[vi[j]];
742             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
743           }
744           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
745           x[i2] += xw[0];
746           idiag -= 1;
747           i2    -= 1;
748         }
749         break;
750       case 2:
751         for (i=m-1; i>=0; i--) {
752           v  = aa + 4*ai[i];
753           vi = aj + ai[i];
754           nz = ai[i+1] - ai[i];
755           s[0] = b[i2]; s[1] = b[i2+1];
756           for (j=0; j<nz; j++) {
757             idx = 2*vi[j];
758             it  = 4*j;
759             xw[0] = x[idx]; xw[1] = x[1+idx];
760             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
761           }
762           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
763           x[i2]  += xw[0]; x[i2+1] += xw[1];
764           idiag  -= 4;
765           i2     -= 2;
766         }
767         break;
768       case 3:
769         for (i=m-1; i>=0; i--) {
770           v  = aa + 9*ai[i];
771           vi = aj + ai[i];
772           nz = ai[i+1] - ai[i];
773           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
774           while (nz--) {
775             idx = 3*(*vi++);
776             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
777             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
778             v  += 9;
779           }
780           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
781           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
782           idiag  -= 9;
783           i2     -= 3;
784         }
785         break;
786       case 4:
787         for (i=m-1; i>=0; i--) {
788           v  = aa + 16*ai[i];
789           vi = aj + ai[i];
790           nz = ai[i+1] - ai[i];
791           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
792           while (nz--) {
793             idx = 4*(*vi++);
794             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
795             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
796             v  += 16;
797           }
798           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
799           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
800           idiag  -= 16;
801           i2     -= 4;
802         }
803         break;
804       case 5:
805         for (i=m-1; i>=0; i--) {
806           v  = aa + 25*ai[i];
807           vi = aj + ai[i];
808           nz = ai[i+1] - ai[i];
809           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
810           while (nz--) {
811             idx = 5*(*vi++);
812             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
813             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
814             v  += 25;
815           }
816           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
817           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
818           idiag  -= 25;
819           i2     -= 5;
820         }
821         break;
822       case 6:
823         for (i=m-1; i>=0; i--) {
824           v  = aa + 36*ai[i];
825           vi = aj + ai[i];
826           nz = ai[i+1] - ai[i];
827           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
828           while (nz--) {
829             idx = 6*(*vi++);
830             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
831             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
832             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
833             v  += 36;
834           }
835           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
836           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
837           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
838           idiag  -= 36;
839           i2     -= 6;
840         }
841         break;
842       case 7:
843         for (i=m-1; i>=0; i--) {
844           v  = aa + 49*ai[i];
845           vi = aj + ai[i];
846           nz = ai[i+1] - ai[i];
847           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
848           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
849           while (nz--) {
850             idx = 7*(*vi++);
851             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
852             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
853             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
854             v  += 49;
855           }
856           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
857           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
858           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
859           idiag  -= 49;
860           i2     -= 7;
861         }
862         break;
863       default:
864         for (i=m-1; i>=0; i--) {
865           v  = aa + bs2*ai[i];
866           vi = aj + ai[i];
867           nz = ai[i+1] - ai[i];
868 
869           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
870           /* copy all rows of x that are needed into contiguous space */
871           workt = work;
872           for (j=0; j<nz; j++) {
873             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
874             workt += bs;
875           }
876           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
877           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
878 
879           idiag -= bs2;
880           i2    -= bs;
881         }
882         break;
883       }
884       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
885     }
886   }
887   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
888   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
889   PetscFunctionReturn(0);
890 }
891 
892 
893 /*
894     Special version for direct calls from Fortran (Used in PETSc-fun3d)
895 */
896 #if defined(PETSC_HAVE_FORTRAN_CAPS)
897 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
898 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
899 #define matsetvaluesblocked4_ matsetvaluesblocked4
900 #endif
901 
902 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
903 {
904   Mat               A  = *AA;
905   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
906   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
907   PetscInt          *ai    =a->i,*ailen=a->ilen;
908   PetscInt          *aj    =a->j,stepval,lastcol = -1;
909   const PetscScalar *value = v;
910   MatScalar         *ap,*aa = a->a,*bap;
911 
912   PetscFunctionBegin;
913   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
914   stepval = (n-1)*4;
915   for (k=0; k<m; k++) { /* loop over added rows */
916     row  = im[k];
917     rp   = aj + ai[row];
918     ap   = aa + 16*ai[row];
919     nrow = ailen[row];
920     low  = 0;
921     high = nrow;
922     for (l=0; l<n; l++) { /* loop over added columns */
923       col = in[l];
924       if (col <= lastcol)  low = 0;
925       else                high = nrow;
926       lastcol = col;
927       value   = v + k*(stepval+4 + l)*4;
928       while (high-low > 7) {
929         t = (low+high)/2;
930         if (rp[t] > col) high = t;
931         else             low  = t;
932       }
933       for (i=low; i<high; i++) {
934         if (rp[i] > col) break;
935         if (rp[i] == col) {
936           bap = ap +  16*i;
937           for (ii=0; ii<4; ii++,value+=stepval) {
938             for (jj=ii; jj<16; jj+=4) {
939               bap[jj] += *value++;
940             }
941           }
942           goto noinsert2;
943         }
944       }
945       N = nrow++ - 1;
946       high++; /* added new column index thus must search to one higher than before */
947       /* shift up all the later entries in this row */
948       for (ii=N; ii>=i; ii--) {
949         rp[ii+1] = rp[ii];
950         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
951       }
952       if (N >= i) {
953         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
954       }
955       rp[i] = col;
956       bap   = ap +  16*i;
957       for (ii=0; ii<4; ii++,value+=stepval) {
958         for (jj=ii; jj<16; jj+=4) {
959           bap[jj] = *value++;
960         }
961       }
962       noinsert2:;
963       low = i;
964     }
965     ailen[row] = nrow;
966   }
967   PetscFunctionReturnVoid();
968 }
969 
970 #if defined(PETSC_HAVE_FORTRAN_CAPS)
971 #define matsetvalues4_ MATSETVALUES4
972 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
973 #define matsetvalues4_ matsetvalues4
974 #endif
975 
976 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
977 {
978   Mat         A  = *AA;
979   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
980   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
981   PetscInt    *ai=a->i,*ailen=a->ilen;
982   PetscInt    *aj=a->j,brow,bcol;
983   PetscInt    ridx,cidx,lastcol = -1;
984   MatScalar   *ap,value,*aa=a->a,*bap;
985 
986   PetscFunctionBegin;
987   for (k=0; k<m; k++) { /* loop over added rows */
988     row  = im[k]; brow = row/4;
989     rp   = aj + ai[brow];
990     ap   = aa + 16*ai[brow];
991     nrow = ailen[brow];
992     low  = 0;
993     high = nrow;
994     for (l=0; l<n; l++) { /* loop over added columns */
995       col   = in[l]; bcol = col/4;
996       ridx  = row % 4; cidx = col % 4;
997       value = v[l + k*n];
998       if (col <= lastcol)  low = 0;
999       else                high = nrow;
1000       lastcol = col;
1001       while (high-low > 7) {
1002         t = (low+high)/2;
1003         if (rp[t] > bcol) high = t;
1004         else              low  = t;
1005       }
1006       for (i=low; i<high; i++) {
1007         if (rp[i] > bcol) break;
1008         if (rp[i] == bcol) {
1009           bap   = ap +  16*i + 4*cidx + ridx;
1010           *bap += value;
1011           goto noinsert1;
1012         }
1013       }
1014       N = nrow++ - 1;
1015       high++; /* added new column thus must search to one higher than before */
1016       /* shift up all the later entries in this row */
1017       for (ii=N; ii>=i; ii--) {
1018         rp[ii+1] = rp[ii];
1019         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1020       }
1021       if (N>=i) {
1022         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1023       }
1024       rp[i]                    = bcol;
1025       ap[16*i + 4*cidx + ridx] = value;
1026 noinsert1:;
1027       low = i;
1028     }
1029     ailen[brow] = nrow;
1030   }
1031   PetscFunctionReturnVoid();
1032 }
1033 
1034 /*
1035      Checks for missing diagonals
1036 */
1037 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1038 {
1039   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1040   PetscErrorCode ierr;
1041   PetscInt       *diag,*ii = a->i,i;
1042 
1043   PetscFunctionBegin;
1044   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1045   *missing = PETSC_FALSE;
1046   if (A->rmap->n > 0 && !ii) {
1047     *missing = PETSC_TRUE;
1048     if (d) *d = 0;
1049     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1050   } else {
1051     diag = a->diag;
1052     for (i=0; i<a->mbs; i++) {
1053       if (diag[i] >= ii[i+1]) {
1054         *missing = PETSC_TRUE;
1055         if (d) *d = i;
1056         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1057         break;
1058       }
1059     }
1060   }
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1065 {
1066   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1067   PetscErrorCode ierr;
1068   PetscInt       i,j,m = a->mbs;
1069 
1070   PetscFunctionBegin;
1071   if (!a->diag) {
1072     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1073     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1074     a->free_diag = PETSC_TRUE;
1075   }
1076   for (i=0; i<m; i++) {
1077     a->diag[i] = a->i[i+1];
1078     for (j=a->i[i]; j<a->i[i+1]; j++) {
1079       if (a->j[j] == i) {
1080         a->diag[i] = j;
1081         break;
1082       }
1083     }
1084   }
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 
1089 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1090 {
1091   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1092   PetscErrorCode ierr;
1093   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1094   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1095 
1096   PetscFunctionBegin;
1097   *nn = n;
1098   if (!ia) PetscFunctionReturn(0);
1099   if (symmetric) {
1100     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1101     nz   = tia[n];
1102   } else {
1103     tia = a->i; tja = a->j;
1104   }
1105 
1106   if (!blockcompressed && bs > 1) {
1107     (*nn) *= bs;
1108     /* malloc & create the natural set of indices */
1109     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1110     if (n) {
1111       (*ia)[0] = oshift;
1112       for (j=1; j<bs; j++) {
1113         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1114       }
1115     }
1116 
1117     for (i=1; i<n; i++) {
1118       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1119       for (j=1; j<bs; j++) {
1120         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1121       }
1122     }
1123     if (n) {
1124       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1125     }
1126 
1127     if (inja) {
1128       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1129       cnt = 0;
1130       for (i=0; i<n; i++) {
1131         for (j=0; j<bs; j++) {
1132           for (k=tia[i]; k<tia[i+1]; k++) {
1133             for (l=0; l<bs; l++) {
1134               (*ja)[cnt++] = bs*tja[k] + l;
1135             }
1136           }
1137         }
1138       }
1139     }
1140 
1141     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1142       ierr = PetscFree(tia);CHKERRQ(ierr);
1143       ierr = PetscFree(tja);CHKERRQ(ierr);
1144     }
1145   } else if (oshift == 1) {
1146     if (symmetric) {
1147       nz = tia[A->rmap->n/bs];
1148       /*  add 1 to i and j indices */
1149       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1150       *ia = tia;
1151       if (ja) {
1152         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1153         *ja = tja;
1154       }
1155     } else {
1156       nz = a->i[A->rmap->n/bs];
1157       /* malloc space and  add 1 to i and j indices */
1158       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1159       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1160       if (ja) {
1161         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1162         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1163       }
1164     }
1165   } else {
1166     *ia = tia;
1167     if (ja) *ja = tja;
1168   }
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1173 {
1174   PetscErrorCode ierr;
1175 
1176   PetscFunctionBegin;
1177   if (!ia) PetscFunctionReturn(0);
1178   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1179     ierr = PetscFree(*ia);CHKERRQ(ierr);
1180     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1181   }
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1186 {
1187   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1188   PetscErrorCode ierr;
1189 
1190   PetscFunctionBegin;
1191 #if defined(PETSC_USE_LOG)
1192   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1193 #endif
1194   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1195   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1196   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1197   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1198   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1199   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1200   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1201   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1202   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1203   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1204   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1205   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1206   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1207 
1208   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1209   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1210   ierr = PetscFree(A->data);CHKERRQ(ierr);
1211 
1212   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1213   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1214   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1215   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1223 #if defined(PETSC_HAVE_HYPRE)
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1225 #endif
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1230 {
1231   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1232   PetscErrorCode ierr;
1233 
1234   PetscFunctionBegin;
1235   switch (op) {
1236   case MAT_ROW_ORIENTED:
1237     a->roworiented = flg;
1238     break;
1239   case MAT_KEEP_NONZERO_PATTERN:
1240     a->keepnonzeropattern = flg;
1241     break;
1242   case MAT_NEW_NONZERO_LOCATIONS:
1243     a->nonew = (flg ? 0 : 1);
1244     break;
1245   case MAT_NEW_NONZERO_LOCATION_ERR:
1246     a->nonew = (flg ? -1 : 0);
1247     break;
1248   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1249     a->nonew = (flg ? -2 : 0);
1250     break;
1251   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1252     a->nounused = (flg ? -1 : 0);
1253     break;
1254   case MAT_NEW_DIAGONALS:
1255   case MAT_IGNORE_OFF_PROC_ENTRIES:
1256   case MAT_USE_HASH_TABLE:
1257     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1258     break;
1259   case MAT_SPD:
1260   case MAT_SYMMETRIC:
1261   case MAT_STRUCTURALLY_SYMMETRIC:
1262   case MAT_HERMITIAN:
1263   case MAT_SYMMETRY_ETERNAL:
1264   case MAT_SUBMAT_SINGLEIS:
1265   case MAT_STRUCTURE_ONLY:
1266     /* These options are handled directly by MatSetOption() */
1267     break;
1268   default:
1269     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1270   }
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1275 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1276 {
1277   PetscErrorCode ierr;
1278   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1279   MatScalar      *aa_i;
1280   PetscScalar    *v_i;
1281 
1282   PetscFunctionBegin;
1283   bs  = A->rmap->bs;
1284   bs2 = bs*bs;
1285   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1286 
1287   bn  = row/bs;   /* Block number */
1288   bp  = row % bs; /* Block Position */
1289   M   = ai[bn+1] - ai[bn];
1290   *nz = bs*M;
1291 
1292   if (v) {
1293     *v = 0;
1294     if (*nz) {
1295       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1296       for (i=0; i<M; i++) { /* for each block in the block row */
1297         v_i  = *v + i*bs;
1298         aa_i = aa + bs2*(ai[bn] + i);
1299         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1300       }
1301     }
1302   }
1303 
1304   if (idx) {
1305     *idx = 0;
1306     if (*nz) {
1307       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1308       for (i=0; i<M; i++) { /* for each block in the block row */
1309         idx_i = *idx + i*bs;
1310         itmp  = bs*aj[ai[bn] + i];
1311         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1312       }
1313     }
1314   }
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1319 {
1320   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1321   PetscErrorCode ierr;
1322 
1323   PetscFunctionBegin;
1324   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1329 {
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1334   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1339 {
1340   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1341   Mat            C;
1342   PetscErrorCode ierr;
1343   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1344   PetscInt       *rows,*cols,bs2=a->bs2;
1345   MatScalar      *array;
1346 
1347   PetscFunctionBegin;
1348   if (reuse == MAT_INPLACE_MATRIX && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1349   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1350     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1351 
1352     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1353     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1354     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1355     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1356     ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr);
1357     ierr = PetscFree(col);CHKERRQ(ierr);
1358   } else {
1359     C = *B;
1360   }
1361 
1362   array = a->a;
1363   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1364   for (i=0; i<mbs; i++) {
1365     cols[0] = i*bs;
1366     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1367     len = ai[i+1] - ai[i];
1368     for (j=0; j<len; j++) {
1369       rows[0] = (*aj++)*bs;
1370       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1371       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1372       array += bs2;
1373     }
1374   }
1375   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1376 
1377   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1378   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1379 
1380   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1381     *B = C;
1382   } else {
1383     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1384   }
1385   PetscFunctionReturn(0);
1386 }
1387 
1388 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1389 {
1390   PetscErrorCode ierr;
1391   Mat            Btrans;
1392 
1393   PetscFunctionBegin;
1394   *f   = PETSC_FALSE;
1395   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1396   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1397   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1402 {
1403   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1404   PetscErrorCode ierr;
1405   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1406   int            fd;
1407   PetscScalar    *aa;
1408   FILE           *file;
1409 
1410   PetscFunctionBegin;
1411   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1412   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1413   col_lens[0] = MAT_FILE_CLASSID;
1414 
1415   col_lens[1] = A->rmap->N;
1416   col_lens[2] = A->cmap->n;
1417   col_lens[3] = a->nz*bs2;
1418 
1419   /* store lengths of each row and write (including header) to file */
1420   count = 0;
1421   for (i=0; i<a->mbs; i++) {
1422     for (j=0; j<bs; j++) {
1423       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1424     }
1425   }
1426   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1427   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1428 
1429   /* store column indices (zero start index) */
1430   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1431   count = 0;
1432   for (i=0; i<a->mbs; i++) {
1433     for (j=0; j<bs; j++) {
1434       for (k=a->i[i]; k<a->i[i+1]; k++) {
1435         for (l=0; l<bs; l++) {
1436           jj[count++] = bs*a->j[k] + l;
1437         }
1438       }
1439     }
1440   }
1441   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1442   ierr = PetscFree(jj);CHKERRQ(ierr);
1443 
1444   /* store nonzero values */
1445   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1446   count = 0;
1447   for (i=0; i<a->mbs; i++) {
1448     for (j=0; j<bs; j++) {
1449       for (k=a->i[i]; k<a->i[i+1]; k++) {
1450         for (l=0; l<bs; l++) {
1451           aa[count++] = a->a[bs2*k + l*bs + j];
1452         }
1453       }
1454     }
1455   }
1456   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1457   ierr = PetscFree(aa);CHKERRQ(ierr);
1458 
1459   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1460   if (file) {
1461     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1462   }
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1467 {
1468   PetscErrorCode ierr;
1469   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1470   PetscInt       i,bs = A->rmap->bs,k;
1471 
1472   PetscFunctionBegin;
1473   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1474   for (i=0; i<a->mbs; i++) {
1475     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1476     for (k=a->i[i]; k<a->i[i+1]; k++) {
1477       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1478     }
1479     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1480   }
1481   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1486 {
1487   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1488   PetscErrorCode    ierr;
1489   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1490   PetscViewerFormat format;
1491 
1492   PetscFunctionBegin;
1493   if (A->structure_only) {
1494     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1495     PetscFunctionReturn(0);
1496   }
1497 
1498   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1499   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1500     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1501   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1502     const char *matname;
1503     Mat        aij;
1504     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1505     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1506     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1507     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1508     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1509   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1510       PetscFunctionReturn(0);
1511   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1512     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1513     for (i=0; i<a->mbs; i++) {
1514       for (j=0; j<bs; j++) {
1515         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1516         for (k=a->i[i]; k<a->i[i+1]; k++) {
1517           for (l=0; l<bs; l++) {
1518 #if defined(PETSC_USE_COMPLEX)
1519             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1520               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1521                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1522             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1523               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1524                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1525             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1526               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1527             }
1528 #else
1529             if (a->a[bs2*k + l*bs + j] != 0.0) {
1530               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1531             }
1532 #endif
1533           }
1534         }
1535         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1536       }
1537     }
1538     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1539   } else {
1540     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1541     for (i=0; i<a->mbs; i++) {
1542       for (j=0; j<bs; j++) {
1543         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1544         for (k=a->i[i]; k<a->i[i+1]; k++) {
1545           for (l=0; l<bs; l++) {
1546 #if defined(PETSC_USE_COMPLEX)
1547             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1548               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1549                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1550             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1551               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1552                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1553             } else {
1554               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1555             }
1556 #else
1557             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1558 #endif
1559           }
1560         }
1561         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1562       }
1563     }
1564     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1565   }
1566   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 #include <petscdraw.h>
1571 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1572 {
1573   Mat               A = (Mat) Aa;
1574   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1575   PetscErrorCode    ierr;
1576   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1577   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1578   MatScalar         *aa;
1579   PetscViewer       viewer;
1580   PetscViewerFormat format;
1581 
1582   PetscFunctionBegin;
1583   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1584   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1585   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1586 
1587   /* loop over matrix elements drawing boxes */
1588 
1589   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1590     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1591     /* Blue for negative, Cyan for zero and  Red for positive */
1592     color = PETSC_DRAW_BLUE;
1593     for (i=0,row=0; i<mbs; i++,row+=bs) {
1594       for (j=a->i[i]; j<a->i[i+1]; j++) {
1595         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1596         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1597         aa  = a->a + j*bs2;
1598         for (k=0; k<bs; k++) {
1599           for (l=0; l<bs; l++) {
1600             if (PetscRealPart(*aa++) >=  0.) continue;
1601             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1602           }
1603         }
1604       }
1605     }
1606     color = PETSC_DRAW_CYAN;
1607     for (i=0,row=0; i<mbs; i++,row+=bs) {
1608       for (j=a->i[i]; j<a->i[i+1]; j++) {
1609         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1610         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1611         aa  = a->a + j*bs2;
1612         for (k=0; k<bs; k++) {
1613           for (l=0; l<bs; l++) {
1614             if (PetscRealPart(*aa++) != 0.) continue;
1615             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1616           }
1617         }
1618       }
1619     }
1620     color = PETSC_DRAW_RED;
1621     for (i=0,row=0; i<mbs; i++,row+=bs) {
1622       for (j=a->i[i]; j<a->i[i+1]; j++) {
1623         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1624         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1625         aa  = a->a + j*bs2;
1626         for (k=0; k<bs; k++) {
1627           for (l=0; l<bs; l++) {
1628             if (PetscRealPart(*aa++) <= 0.) continue;
1629             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1630           }
1631         }
1632       }
1633     }
1634     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1635   } else {
1636     /* use contour shading to indicate magnitude of values */
1637     /* first determine max of all nonzero values */
1638     PetscReal minv = 0.0, maxv = 0.0;
1639     PetscDraw popup;
1640 
1641     for (i=0; i<a->nz*a->bs2; i++) {
1642       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1643     }
1644     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1645     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1646     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1647 
1648     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1649     for (i=0,row=0; i<mbs; i++,row+=bs) {
1650       for (j=a->i[i]; j<a->i[i+1]; j++) {
1651         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1652         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1653         aa  = a->a + j*bs2;
1654         for (k=0; k<bs; k++) {
1655           for (l=0; l<bs; l++) {
1656             MatScalar v = *aa++;
1657             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1658             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1659           }
1660         }
1661       }
1662     }
1663     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1664   }
1665   PetscFunctionReturn(0);
1666 }
1667 
1668 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1669 {
1670   PetscErrorCode ierr;
1671   PetscReal      xl,yl,xr,yr,w,h;
1672   PetscDraw      draw;
1673   PetscBool      isnull;
1674 
1675   PetscFunctionBegin;
1676   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1677   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1678   if (isnull) PetscFunctionReturn(0);
1679 
1680   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1681   xr  += w;          yr += h;        xl = -w;     yl = -h;
1682   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1683   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1684   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1685   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1686   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1691 {
1692   PetscErrorCode ierr;
1693   PetscBool      iascii,isbinary,isdraw;
1694 
1695   PetscFunctionBegin;
1696   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1697   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1698   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1699   if (iascii) {
1700     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1701   } else if (isbinary) {
1702     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1703   } else if (isdraw) {
1704     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1705   } else {
1706     Mat B;
1707     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1708     ierr = MatView(B,viewer);CHKERRQ(ierr);
1709     ierr = MatDestroy(&B);CHKERRQ(ierr);
1710   }
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 
1715 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1716 {
1717   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1718   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1719   PetscInt    *ai = a->i,*ailen = a->ilen;
1720   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1721   MatScalar   *ap,*aa = a->a;
1722 
1723   PetscFunctionBegin;
1724   for (k=0; k<m; k++) { /* loop over rows */
1725     row = im[k]; brow = row/bs;
1726     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1727     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1728     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1729     nrow = ailen[brow];
1730     for (l=0; l<n; l++) { /* loop over columns */
1731       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1732       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1733       col  = in[l];
1734       bcol = col/bs;
1735       cidx = col%bs;
1736       ridx = row%bs;
1737       high = nrow;
1738       low  = 0; /* assume unsorted */
1739       while (high-low > 5) {
1740         t = (low+high)/2;
1741         if (rp[t] > bcol) high = t;
1742         else             low  = t;
1743       }
1744       for (i=low; i<high; i++) {
1745         if (rp[i] > bcol) break;
1746         if (rp[i] == bcol) {
1747           *v++ = ap[bs2*i+bs*cidx+ridx];
1748           goto finished;
1749         }
1750       }
1751       *v++ = 0.0;
1752 finished:;
1753     }
1754   }
1755   PetscFunctionReturn(0);
1756 }
1757 
1758 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1759 {
1760   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1761   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1762   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1763   PetscErrorCode    ierr;
1764   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1765   PetscBool         roworiented=a->roworiented;
1766   const PetscScalar *value     = v;
1767   MatScalar         *ap=NULL,*aa = a->a,*bap;
1768 
1769   PetscFunctionBegin;
1770   if (roworiented) {
1771     stepval = (n-1)*bs;
1772   } else {
1773     stepval = (m-1)*bs;
1774   }
1775   for (k=0; k<m; k++) { /* loop over added rows */
1776     row = im[k];
1777     if (row < 0) continue;
1778 #if defined(PETSC_USE_DEBUG)
1779     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1780 #endif
1781     rp   = aj + ai[row];
1782     if (!A->structure_only) ap = aa + bs2*ai[row];
1783     rmax = imax[row];
1784     nrow = ailen[row];
1785     low  = 0;
1786     high = nrow;
1787     for (l=0; l<n; l++) { /* loop over added columns */
1788       if (in[l] < 0) continue;
1789 #if defined(PETSC_USE_DEBUG)
1790       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1791 #endif
1792       col = in[l];
1793       if (!A->structure_only) {
1794         if (roworiented) {
1795           value = v + (k*(stepval+bs) + l)*bs;
1796         } else {
1797           value = v + (l*(stepval+bs) + k)*bs;
1798         }
1799       }
1800       if (col <= lastcol) low = 0;
1801       else high = nrow;
1802       lastcol = col;
1803       while (high-low > 7) {
1804         t = (low+high)/2;
1805         if (rp[t] > col) high = t;
1806         else             low  = t;
1807       }
1808       for (i=low; i<high; i++) {
1809         if (rp[i] > col) break;
1810         if (rp[i] == col) {
1811           if (A->structure_only) goto noinsert2;
1812           bap = ap +  bs2*i;
1813           if (roworiented) {
1814             if (is == ADD_VALUES) {
1815               for (ii=0; ii<bs; ii++,value+=stepval) {
1816                 for (jj=ii; jj<bs2; jj+=bs) {
1817                   bap[jj] += *value++;
1818                 }
1819               }
1820             } else {
1821               for (ii=0; ii<bs; ii++,value+=stepval) {
1822                 for (jj=ii; jj<bs2; jj+=bs) {
1823                   bap[jj] = *value++;
1824                 }
1825               }
1826             }
1827           } else {
1828             if (is == ADD_VALUES) {
1829               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1830                 for (jj=0; jj<bs; jj++) {
1831                   bap[jj] += value[jj];
1832                 }
1833                 bap += bs;
1834               }
1835             } else {
1836               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1837                 for (jj=0; jj<bs; jj++) {
1838                   bap[jj]  = value[jj];
1839                 }
1840                 bap += bs;
1841               }
1842             }
1843           }
1844           goto noinsert2;
1845         }
1846       }
1847       if (nonew == 1) goto noinsert2;
1848       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1849       if (A->structure_only) {
1850         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1851       } else {
1852         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1853       }
1854       N = nrow++ - 1; high++;
1855       /* shift up all the later entries in this row */
1856       for (ii=N; ii>=i; ii--) {
1857         rp[ii+1] = rp[ii];
1858         if (!A->structure_only) {
1859           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1860         }
1861       }
1862       if (N >= i && !A->structure_only) {
1863         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1864       }
1865 
1866       rp[i] = col;
1867       if (!A->structure_only) {
1868         bap   = ap +  bs2*i;
1869         if (roworiented) {
1870           for (ii=0; ii<bs; ii++,value+=stepval) {
1871             for (jj=ii; jj<bs2; jj+=bs) {
1872               bap[jj] = *value++;
1873             }
1874           }
1875         } else {
1876           for (ii=0; ii<bs; ii++,value+=stepval) {
1877             for (jj=0; jj<bs; jj++) {
1878               *bap++ = *value++;
1879             }
1880           }
1881         }
1882       }
1883 noinsert2:;
1884       low = i;
1885     }
1886     ailen[row] = nrow;
1887   }
1888   PetscFunctionReturn(0);
1889 }
1890 
1891 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1892 {
1893   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1894   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1895   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1896   PetscErrorCode ierr;
1897   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1898   MatScalar      *aa  = a->a,*ap;
1899   PetscReal      ratio=0.6;
1900 
1901   PetscFunctionBegin;
1902   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1903 
1904   if (m) rmax = ailen[0];
1905   for (i=1; i<mbs; i++) {
1906     /* move each row back by the amount of empty slots (fshift) before it*/
1907     fshift += imax[i-1] - ailen[i-1];
1908     rmax    = PetscMax(rmax,ailen[i]);
1909     if (fshift) {
1910       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1911       N  = ailen[i];
1912       for (j=0; j<N; j++) {
1913         ip[j-fshift] = ip[j];
1914         if (!A->structure_only) {
1915           ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1916         }
1917       }
1918     }
1919     ai[i] = ai[i-1] + ailen[i-1];
1920   }
1921   if (mbs) {
1922     fshift += imax[mbs-1] - ailen[mbs-1];
1923     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1924   }
1925 
1926   /* reset ilen and imax for each row */
1927   a->nonzerorowcnt = 0;
1928   if (A->structure_only) {
1929     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1930   } else { /* !A->structure_only */
1931     for (i=0; i<mbs; i++) {
1932       ailen[i] = imax[i] = ai[i+1] - ai[i];
1933       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1934     }
1935   }
1936   a->nz = ai[mbs];
1937 
1938   /* diagonals may have moved, so kill the diagonal pointers */
1939   a->idiagvalid = PETSC_FALSE;
1940   if (fshift && a->diag) {
1941     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1942     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1943     a->diag = 0;
1944   }
1945   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1946   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1947   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1948   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1949 
1950   A->info.mallocs    += a->reallocs;
1951   a->reallocs         = 0;
1952   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1953   a->rmax             = rmax;
1954 
1955   if (!A->structure_only) {
1956     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1957   }
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 /*
1962    This function returns an array of flags which indicate the locations of contiguous
1963    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1964    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1965    Assume: sizes should be long enough to hold all the values.
1966 */
1967 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1968 {
1969   PetscInt  i,j,k,row;
1970   PetscBool flg;
1971 
1972   PetscFunctionBegin;
1973   for (i=0,j=0; i<n; j++) {
1974     row = idx[i];
1975     if (row%bs!=0) { /* Not the begining of a block */
1976       sizes[j] = 1;
1977       i++;
1978     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1979       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1980       i++;
1981     } else { /* Begining of the block, so check if the complete block exists */
1982       flg = PETSC_TRUE;
1983       for (k=1; k<bs; k++) {
1984         if (row+k != idx[i+k]) { /* break in the block */
1985           flg = PETSC_FALSE;
1986           break;
1987         }
1988       }
1989       if (flg) { /* No break in the bs */
1990         sizes[j] = bs;
1991         i       += bs;
1992       } else {
1993         sizes[j] = 1;
1994         i++;
1995       }
1996     }
1997   }
1998   *bs_max = j;
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2003 {
2004   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2005   PetscErrorCode    ierr;
2006   PetscInt          i,j,k,count,*rows;
2007   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2008   PetscScalar       zero = 0.0;
2009   MatScalar         *aa;
2010   const PetscScalar *xx;
2011   PetscScalar       *bb;
2012 
2013   PetscFunctionBegin;
2014   /* fix right hand side if needed */
2015   if (x && b) {
2016     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2017     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2018     for (i=0; i<is_n; i++) {
2019       bb[is_idx[i]] = diag*xx[is_idx[i]];
2020     }
2021     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2022     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2023   }
2024 
2025   /* Make a copy of the IS and  sort it */
2026   /* allocate memory for rows,sizes */
2027   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2028 
2029   /* copy IS values to rows, and sort them */
2030   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2031   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2032 
2033   if (baij->keepnonzeropattern) {
2034     for (i=0; i<is_n; i++) sizes[i] = 1;
2035     bs_max          = is_n;
2036   } else {
2037     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2038     A->nonzerostate++;
2039   }
2040 
2041   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2042     row = rows[j];
2043     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2044     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2045     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2046     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2047       if (diag != (PetscScalar)0.0) {
2048         if (baij->ilen[row/bs] > 0) {
2049           baij->ilen[row/bs]       = 1;
2050           baij->j[baij->i[row/bs]] = row/bs;
2051 
2052           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2053         }
2054         /* Now insert all the diagonal values for this bs */
2055         for (k=0; k<bs; k++) {
2056           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2057         }
2058       } else { /* (diag == 0.0) */
2059         baij->ilen[row/bs] = 0;
2060       } /* end (diag == 0.0) */
2061     } else { /* (sizes[i] != bs) */
2062 #if defined(PETSC_USE_DEBUG)
2063       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2064 #endif
2065       for (k=0; k<count; k++) {
2066         aa[0] =  zero;
2067         aa   += bs;
2068       }
2069       if (diag != (PetscScalar)0.0) {
2070         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2071       }
2072     }
2073   }
2074 
2075   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2076   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2077   PetscFunctionReturn(0);
2078 }
2079 
2080 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2081 {
2082   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2083   PetscErrorCode    ierr;
2084   PetscInt          i,j,k,count;
2085   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2086   PetscScalar       zero = 0.0;
2087   MatScalar         *aa;
2088   const PetscScalar *xx;
2089   PetscScalar       *bb;
2090   PetscBool         *zeroed,vecs = PETSC_FALSE;
2091 
2092   PetscFunctionBegin;
2093   /* fix right hand side if needed */
2094   if (x && b) {
2095     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2096     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2097     vecs = PETSC_TRUE;
2098   }
2099 
2100   /* zero the columns */
2101   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2102   for (i=0; i<is_n; i++) {
2103     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2104     zeroed[is_idx[i]] = PETSC_TRUE;
2105   }
2106   for (i=0; i<A->rmap->N; i++) {
2107     if (!zeroed[i]) {
2108       row = i/bs;
2109       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2110         for (k=0; k<bs; k++) {
2111           col = bs*baij->j[j] + k;
2112           if (zeroed[col]) {
2113             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2114             if (vecs) bb[i] -= aa[0]*xx[col];
2115             aa[0] = 0.0;
2116           }
2117         }
2118       }
2119     } else if (vecs) bb[i] = diag*xx[i];
2120   }
2121   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2122   if (vecs) {
2123     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2124     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2125   }
2126 
2127   /* zero the rows */
2128   for (i=0; i<is_n; i++) {
2129     row   = is_idx[i];
2130     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2131     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2132     for (k=0; k<count; k++) {
2133       aa[0] =  zero;
2134       aa   += bs;
2135     }
2136     if (diag != (PetscScalar)0.0) {
2137       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2138     }
2139   }
2140   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2141   PetscFunctionReturn(0);
2142 }
2143 
2144 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2145 {
2146   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2147   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2148   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2149   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2150   PetscErrorCode ierr;
2151   PetscInt       ridx,cidx,bs2=a->bs2;
2152   PetscBool      roworiented=a->roworiented;
2153   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2154 
2155   PetscFunctionBegin;
2156   for (k=0; k<m; k++) { /* loop over added rows */
2157     row  = im[k];
2158     brow = row/bs;
2159     if (row < 0) continue;
2160 #if defined(PETSC_USE_DEBUG)
2161     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2162 #endif
2163     rp   = aj + ai[brow];
2164     if (!A->structure_only) ap = aa + bs2*ai[brow];
2165     rmax = imax[brow];
2166     nrow = ailen[brow];
2167     low  = 0;
2168     high = nrow;
2169     for (l=0; l<n; l++) { /* loop over added columns */
2170       if (in[l] < 0) continue;
2171 #if defined(PETSC_USE_DEBUG)
2172       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2173 #endif
2174       col  = in[l]; bcol = col/bs;
2175       ridx = row % bs; cidx = col % bs;
2176       if (!A->structure_only) {
2177         if (roworiented) {
2178           value = v[l + k*n];
2179         } else {
2180           value = v[k + l*m];
2181         }
2182       }
2183       if (col <= lastcol) low = 0; else high = nrow;
2184       lastcol = col;
2185       while (high-low > 7) {
2186         t = (low+high)/2;
2187         if (rp[t] > bcol) high = t;
2188         else              low  = t;
2189       }
2190       for (i=low; i<high; i++) {
2191         if (rp[i] > bcol) break;
2192         if (rp[i] == bcol) {
2193           bap = ap +  bs2*i + bs*cidx + ridx;
2194           if (!A->structure_only) {
2195             if (is == ADD_VALUES) *bap += value;
2196             else                  *bap  = value;
2197           }
2198           goto noinsert1;
2199         }
2200       }
2201       if (nonew == 1) goto noinsert1;
2202       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2203       if (A->structure_only) {
2204         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2205       } else {
2206         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2207       }
2208       N = nrow++ - 1; high++;
2209       /* shift up all the later entries in this row */
2210       for (ii=N; ii>=i; ii--) {
2211         rp[ii+1] = rp[ii];
2212         if (!A->structure_only) {
2213           ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2214         }
2215       }
2216       if (N>=i && !A->structure_only) {
2217         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2218       }
2219       rp[i]                      = bcol;
2220       if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value;
2221       a->nz++;
2222       A->nonzerostate++;
2223 noinsert1:;
2224       low = i;
2225     }
2226     ailen[brow] = nrow;
2227   }
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2232 {
2233   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2234   Mat            outA;
2235   PetscErrorCode ierr;
2236   PetscBool      row_identity,col_identity;
2237 
2238   PetscFunctionBegin;
2239   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2240   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2241   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2242   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2243 
2244   outA            = inA;
2245   inA->factortype = MAT_FACTOR_LU;
2246   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2247   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2248 
2249   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2250 
2251   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2252   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2253   a->row = row;
2254   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2255   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2256   a->col = col;
2257 
2258   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2259   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2260   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2261   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2262 
2263   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2264   if (!a->solve_work) {
2265     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2266     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2267   }
2268   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2273 {
2274   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2275   PetscInt    i,nz,mbs;
2276 
2277   PetscFunctionBegin;
2278   nz  = baij->maxnz;
2279   mbs = baij->mbs;
2280   for (i=0; i<nz; i++) {
2281     baij->j[i] = indices[i];
2282   }
2283   baij->nz = nz;
2284   for (i=0; i<mbs; i++) {
2285     baij->ilen[i] = baij->imax[i];
2286   }
2287   PetscFunctionReturn(0);
2288 }
2289 
2290 /*@
2291     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2292        in the matrix.
2293 
2294   Input Parameters:
2295 +  mat - the SeqBAIJ matrix
2296 -  indices - the column indices
2297 
2298   Level: advanced
2299 
2300   Notes:
2301     This can be called if you have precomputed the nonzero structure of the
2302   matrix and want to provide it to the matrix object to improve the performance
2303   of the MatSetValues() operation.
2304 
2305     You MUST have set the correct numbers of nonzeros per row in the call to
2306   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2307 
2308     MUST be called before any calls to MatSetValues();
2309 
2310 @*/
2311 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2312 {
2313   PetscErrorCode ierr;
2314 
2315   PetscFunctionBegin;
2316   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2317   PetscValidPointer(indices,2);
2318   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2323 {
2324   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2325   PetscErrorCode ierr;
2326   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2327   PetscReal      atmp;
2328   PetscScalar    *x,zero = 0.0;
2329   MatScalar      *aa;
2330   PetscInt       ncols,brow,krow,kcol;
2331 
2332   PetscFunctionBegin;
2333   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2334   bs  = A->rmap->bs;
2335   aa  = a->a;
2336   ai  = a->i;
2337   aj  = a->j;
2338   mbs = a->mbs;
2339 
2340   ierr = VecSet(v,zero);CHKERRQ(ierr);
2341   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2342   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2343   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2344   for (i=0; i<mbs; i++) {
2345     ncols = ai[1] - ai[0]; ai++;
2346     brow  = bs*i;
2347     for (j=0; j<ncols; j++) {
2348       for (kcol=0; kcol<bs; kcol++) {
2349         for (krow=0; krow<bs; krow++) {
2350           atmp = PetscAbsScalar(*aa);aa++;
2351           row  = brow + krow;   /* row index */
2352           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2353         }
2354       }
2355       aj++;
2356     }
2357   }
2358   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2359   PetscFunctionReturn(0);
2360 }
2361 
2362 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2363 {
2364   PetscErrorCode ierr;
2365 
2366   PetscFunctionBegin;
2367   /* If the two matrices have the same copy implementation, use fast copy. */
2368   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2369     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2370     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2371     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2372 
2373     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2374     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2375     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2376     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2377   } else {
2378     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2379   }
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2384 {
2385   PetscErrorCode ierr;
2386 
2387   PetscFunctionBegin;
2388   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2389   PetscFunctionReturn(0);
2390 }
2391 
2392 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2393 {
2394   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2395 
2396   PetscFunctionBegin;
2397   *array = a->a;
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2402 {
2403   PetscFunctionBegin;
2404   PetscFunctionReturn(0);
2405 }
2406 
2407 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2408 {
2409   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2410   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2411   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2412   PetscErrorCode ierr;
2413 
2414   PetscFunctionBegin;
2415   /* Set the number of nonzeros in the new matrix */
2416   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2421 {
2422   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2423   PetscErrorCode ierr;
2424   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2425   PetscBLASInt   one=1;
2426 
2427   PetscFunctionBegin;
2428   if (str == SAME_NONZERO_PATTERN) {
2429     PetscScalar  alpha = a;
2430     PetscBLASInt bnz;
2431     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2432     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2433     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2434   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2435     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2436   } else {
2437     Mat      B;
2438     PetscInt *nnz;
2439     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2440     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2441     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2442     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2443     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2444     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2445     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2446     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2447     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2448     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2449     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2450     ierr = PetscFree(nnz);CHKERRQ(ierr);
2451   }
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2456 {
2457   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2458   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2459   MatScalar   *aa = a->a;
2460 
2461   PetscFunctionBegin;
2462   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2463   PetscFunctionReturn(0);
2464 }
2465 
2466 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2467 {
2468   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2469   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2470   MatScalar   *aa = a->a;
2471 
2472   PetscFunctionBegin;
2473   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2474   PetscFunctionReturn(0);
2475 }
2476 
2477 /*
2478     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2479 */
2480 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2481 {
2482   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2483   PetscErrorCode ierr;
2484   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2485   PetscInt       nz = a->i[m],row,*jj,mr,col;
2486 
2487   PetscFunctionBegin;
2488   *nn = n;
2489   if (!ia) PetscFunctionReturn(0);
2490   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2491   else {
2492     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2493     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2494     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2495     jj   = a->j;
2496     for (i=0; i<nz; i++) {
2497       collengths[jj[i]]++;
2498     }
2499     cia[0] = oshift;
2500     for (i=0; i<n; i++) {
2501       cia[i+1] = cia[i] + collengths[i];
2502     }
2503     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2504     jj   = a->j;
2505     for (row=0; row<m; row++) {
2506       mr = a->i[row+1] - a->i[row];
2507       for (i=0; i<mr; i++) {
2508         col = *jj++;
2509 
2510         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2511       }
2512     }
2513     ierr = PetscFree(collengths);CHKERRQ(ierr);
2514     *ia  = cia; *ja = cja;
2515   }
2516   PetscFunctionReturn(0);
2517 }
2518 
2519 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2520 {
2521   PetscErrorCode ierr;
2522 
2523   PetscFunctionBegin;
2524   if (!ia) PetscFunctionReturn(0);
2525   ierr = PetscFree(*ia);CHKERRQ(ierr);
2526   ierr = PetscFree(*ja);CHKERRQ(ierr);
2527   PetscFunctionReturn(0);
2528 }
2529 
2530 /*
2531  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2532  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2533  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2534  */
2535 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2536 {
2537   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2538   PetscErrorCode ierr;
2539   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2540   PetscInt       nz = a->i[m],row,*jj,mr,col;
2541   PetscInt       *cspidx;
2542 
2543   PetscFunctionBegin;
2544   *nn = n;
2545   if (!ia) PetscFunctionReturn(0);
2546 
2547   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2548   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2549   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2550   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2551   jj   = a->j;
2552   for (i=0; i<nz; i++) {
2553     collengths[jj[i]]++;
2554   }
2555   cia[0] = oshift;
2556   for (i=0; i<n; i++) {
2557     cia[i+1] = cia[i] + collengths[i];
2558   }
2559   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2560   jj   = a->j;
2561   for (row=0; row<m; row++) {
2562     mr = a->i[row+1] - a->i[row];
2563     for (i=0; i<mr; i++) {
2564       col = *jj++;
2565       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2566       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2567     }
2568   }
2569   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2570   *ia    = cia; *ja = cja;
2571   *spidx = cspidx;
2572   PetscFunctionReturn(0);
2573 }
2574 
2575 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2576 {
2577   PetscErrorCode ierr;
2578 
2579   PetscFunctionBegin;
2580   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2581   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2586 {
2587   PetscErrorCode ierr;
2588   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2589 
2590   PetscFunctionBegin;
2591   if (!Y->preallocated || !aij->nz) {
2592     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2593   }
2594   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2595   PetscFunctionReturn(0);
2596 }
2597 
2598 /* -------------------------------------------------------------------*/
2599 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2600                                        MatGetRow_SeqBAIJ,
2601                                        MatRestoreRow_SeqBAIJ,
2602                                        MatMult_SeqBAIJ_N,
2603                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2604                                        MatMultTranspose_SeqBAIJ,
2605                                        MatMultTransposeAdd_SeqBAIJ,
2606                                        0,
2607                                        0,
2608                                        0,
2609                                /* 10*/ 0,
2610                                        MatLUFactor_SeqBAIJ,
2611                                        0,
2612                                        0,
2613                                        MatTranspose_SeqBAIJ,
2614                                /* 15*/ MatGetInfo_SeqBAIJ,
2615                                        MatEqual_SeqBAIJ,
2616                                        MatGetDiagonal_SeqBAIJ,
2617                                        MatDiagonalScale_SeqBAIJ,
2618                                        MatNorm_SeqBAIJ,
2619                                /* 20*/ 0,
2620                                        MatAssemblyEnd_SeqBAIJ,
2621                                        MatSetOption_SeqBAIJ,
2622                                        MatZeroEntries_SeqBAIJ,
2623                                /* 24*/ MatZeroRows_SeqBAIJ,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                /* 29*/ MatSetUp_SeqBAIJ,
2629                                        0,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                /* 34*/ MatDuplicate_SeqBAIJ,
2634                                        0,
2635                                        0,
2636                                        MatILUFactor_SeqBAIJ,
2637                                        0,
2638                                /* 39*/ MatAXPY_SeqBAIJ,
2639                                        MatCreateSubMatrices_SeqBAIJ,
2640                                        MatIncreaseOverlap_SeqBAIJ,
2641                                        MatGetValues_SeqBAIJ,
2642                                        MatCopy_SeqBAIJ,
2643                                /* 44*/ 0,
2644                                        MatScale_SeqBAIJ,
2645                                        MatShift_SeqBAIJ,
2646                                        0,
2647                                        MatZeroRowsColumns_SeqBAIJ,
2648                                /* 49*/ 0,
2649                                        MatGetRowIJ_SeqBAIJ,
2650                                        MatRestoreRowIJ_SeqBAIJ,
2651                                        MatGetColumnIJ_SeqBAIJ,
2652                                        MatRestoreColumnIJ_SeqBAIJ,
2653                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2654                                        0,
2655                                        0,
2656                                        0,
2657                                        MatSetValuesBlocked_SeqBAIJ,
2658                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2659                                        MatDestroy_SeqBAIJ,
2660                                        MatView_SeqBAIJ,
2661                                        0,
2662                                        0,
2663                                /* 64*/ 0,
2664                                        0,
2665                                        0,
2666                                        0,
2667                                        0,
2668                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2669                                        0,
2670                                        MatConvert_Basic,
2671                                        0,
2672                                        0,
2673                                /* 74*/ 0,
2674                                        MatFDColoringApply_BAIJ,
2675                                        0,
2676                                        0,
2677                                        0,
2678                                /* 79*/ 0,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                        MatLoad_SeqBAIJ,
2683                                /* 84*/ 0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                        0,
2688                                /* 89*/ 0,
2689                                        0,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                /* 94*/ 0,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                /* 99*/ 0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                        0,
2703                                /*104*/ 0,
2704                                        MatRealPart_SeqBAIJ,
2705                                        MatImaginaryPart_SeqBAIJ,
2706                                        0,
2707                                        0,
2708                                /*109*/ 0,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                        MatMissingDiagonal_SeqBAIJ,
2713                                /*114*/ 0,
2714                                        0,
2715                                        0,
2716                                        0,
2717                                        0,
2718                                /*119*/ 0,
2719                                        0,
2720                                        MatMultHermitianTranspose_SeqBAIJ,
2721                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2722                                        0,
2723                                /*124*/ 0,
2724                                        0,
2725                                        MatInvertBlockDiagonal_SeqBAIJ,
2726                                        0,
2727                                        0,
2728                                /*129*/ 0,
2729                                        0,
2730                                        0,
2731                                        0,
2732                                        0,
2733                                /*134*/ 0,
2734                                        0,
2735                                        0,
2736                                        0,
2737                                        0,
2738                                /*139*/ MatSetBlockSizes_Default,
2739                                        0,
2740                                        0,
2741                                        MatFDColoringSetUp_SeqXAIJ,
2742                                        0,
2743                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2744                                        MatDestroySubMatrices_SeqBAIJ
2745 };
2746 
2747 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2748 {
2749   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2750   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2751   PetscErrorCode ierr;
2752 
2753   PetscFunctionBegin;
2754   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2755 
2756   /* allocate space for values if not already there */
2757   if (!aij->saved_values) {
2758     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2759     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2760   }
2761 
2762   /* copy values over */
2763   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2764   PetscFunctionReturn(0);
2765 }
2766 
2767 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2768 {
2769   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2770   PetscErrorCode ierr;
2771   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2772 
2773   PetscFunctionBegin;
2774   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2775   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2776 
2777   /* copy values over */
2778   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2779   PetscFunctionReturn(0);
2780 }
2781 
2782 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2783 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2784 
2785 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2786 {
2787   Mat_SeqBAIJ    *b;
2788   PetscErrorCode ierr;
2789   PetscInt       i,mbs,nbs,bs2;
2790   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2791 
2792   PetscFunctionBegin;
2793   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2794   if (nz == MAT_SKIP_ALLOCATION) {
2795     skipallocation = PETSC_TRUE;
2796     nz             = 0;
2797   }
2798 
2799   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2800   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2801   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2802   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2803 
2804   B->preallocated = PETSC_TRUE;
2805 
2806   mbs = B->rmap->n/bs;
2807   nbs = B->cmap->n/bs;
2808   bs2 = bs*bs;
2809 
2810   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2811 
2812   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2813   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2814   if (nnz) {
2815     for (i=0; i<mbs; i++) {
2816       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2817       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2818     }
2819   }
2820 
2821   b    = (Mat_SeqBAIJ*)B->data;
2822   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2823   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2824   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2825 
2826   if (!flg) {
2827     switch (bs) {
2828     case 1:
2829       B->ops->mult    = MatMult_SeqBAIJ_1;
2830       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2831       break;
2832     case 2:
2833       B->ops->mult    = MatMult_SeqBAIJ_2;
2834       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2835       break;
2836     case 3:
2837       B->ops->mult    = MatMult_SeqBAIJ_3;
2838       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2839       break;
2840     case 4:
2841       B->ops->mult    = MatMult_SeqBAIJ_4;
2842       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2843       break;
2844     case 5:
2845       B->ops->mult    = MatMult_SeqBAIJ_5;
2846       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2847       break;
2848     case 6:
2849       B->ops->mult    = MatMult_SeqBAIJ_6;
2850       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2851       break;
2852     case 7:
2853       B->ops->mult    = MatMult_SeqBAIJ_7;
2854       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2855       break;
2856     case 9:
2857 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX)
2858       B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2859       B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2860 #else
2861       B->ops->mult    = MatMult_SeqBAIJ_N;
2862       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2863 #endif
2864       break;
2865     case 11:
2866       B->ops->mult    = MatMult_SeqBAIJ_11;
2867       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2868       break;
2869     case 15:
2870       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2871       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2872       break;
2873     default:
2874       B->ops->mult    = MatMult_SeqBAIJ_N;
2875       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2876       break;
2877     }
2878   }
2879   B->ops->sor = MatSOR_SeqBAIJ;
2880   b->mbs = mbs;
2881   b->nbs = nbs;
2882   if (!skipallocation) {
2883     if (!b->imax) {
2884       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2885       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2886 
2887       b->free_imax_ilen = PETSC_TRUE;
2888     }
2889     /* b->ilen will count nonzeros in each block row so far. */
2890     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2891     if (!nnz) {
2892       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2893       else if (nz < 0) nz = 1;
2894       for (i=0; i<mbs; i++) b->imax[i] = nz;
2895       nz = nz*mbs;
2896     } else {
2897       nz = 0;
2898       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2899     }
2900 
2901     /* allocate the matrix space */
2902     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2903     if (B->structure_only) {
2904       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2905       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2906       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2907     } else {
2908       ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2909       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2910       ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2911     }
2912     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2913 
2914     if (B->structure_only) {
2915       b->singlemalloc = PETSC_FALSE;
2916       b->free_a       = PETSC_FALSE;
2917     } else {
2918       b->singlemalloc = PETSC_TRUE;
2919       b->free_a       = PETSC_TRUE;
2920     }
2921     b->free_ij = PETSC_TRUE;
2922 
2923     b->i[0] = 0;
2924     for (i=1; i<mbs+1; i++) {
2925       b->i[i] = b->i[i-1] + b->imax[i-1];
2926     }
2927 
2928   } else {
2929     b->free_a  = PETSC_FALSE;
2930     b->free_ij = PETSC_FALSE;
2931   }
2932 
2933   b->bs2              = bs2;
2934   b->mbs              = mbs;
2935   b->nz               = 0;
2936   b->maxnz            = nz;
2937   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2938   B->was_assembled    = PETSC_FALSE;
2939   B->assembled        = PETSC_FALSE;
2940   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2941   PetscFunctionReturn(0);
2942 }
2943 
2944 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2945 {
2946   PetscInt       i,m,nz,nz_max=0,*nnz;
2947   PetscScalar    *values=0;
2948   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2949   PetscErrorCode ierr;
2950 
2951   PetscFunctionBegin;
2952   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2953   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2954   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2955   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2956   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2957   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2958   m    = B->rmap->n/bs;
2959 
2960   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2961   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2962   for (i=0; i<m; i++) {
2963     nz = ii[i+1]- ii[i];
2964     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2965     nz_max = PetscMax(nz_max, nz);
2966     nnz[i] = nz;
2967   }
2968   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2969   ierr = PetscFree(nnz);CHKERRQ(ierr);
2970 
2971   values = (PetscScalar*)V;
2972   if (!values) {
2973     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2974   }
2975   for (i=0; i<m; i++) {
2976     PetscInt          ncols  = ii[i+1] - ii[i];
2977     const PetscInt    *icols = jj + ii[i];
2978     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2979     if (!roworiented) {
2980       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2981     } else {
2982       PetscInt j;
2983       for (j=0; j<ncols; j++) {
2984         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2985         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2986       }
2987     }
2988   }
2989   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2990   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2991   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2992   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2993   PetscFunctionReturn(0);
2994 }
2995 
2996 /*MC
2997    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2998    block sparse compressed row format.
2999 
3000    Options Database Keys:
3001 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3002 
3003   Level: beginner
3004 
3005 .seealso: MatCreateSeqBAIJ()
3006 M*/
3007 
3008 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3009 
3010 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3011 {
3012   PetscErrorCode ierr;
3013   PetscMPIInt    size;
3014   Mat_SeqBAIJ    *b;
3015 
3016   PetscFunctionBegin;
3017   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3018   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3019 
3020   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3021   B->data = (void*)b;
3022   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3023 
3024   b->row          = 0;
3025   b->col          = 0;
3026   b->icol         = 0;
3027   b->reallocs     = 0;
3028   b->saved_values = 0;
3029 
3030   b->roworiented        = PETSC_TRUE;
3031   b->nonew              = 0;
3032   b->diag               = 0;
3033   B->spptr              = 0;
3034   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3035   b->keepnonzeropattern = PETSC_FALSE;
3036 
3037   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3039   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3046 #if defined(PETSC_HAVE_HYPRE)
3047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3048 #endif
3049   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3050   PetscFunctionReturn(0);
3051 }
3052 
3053 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3054 {
3055   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3056   PetscErrorCode ierr;
3057   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3058 
3059   PetscFunctionBegin;
3060   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3061 
3062   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3063     c->imax           = a->imax;
3064     c->ilen           = a->ilen;
3065     c->free_imax_ilen = PETSC_FALSE;
3066   } else {
3067     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3068     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3069     for (i=0; i<mbs; i++) {
3070       c->imax[i] = a->imax[i];
3071       c->ilen[i] = a->ilen[i];
3072     }
3073     c->free_imax_ilen = PETSC_TRUE;
3074   }
3075 
3076   /* allocate the matrix space */
3077   if (mallocmatspace) {
3078     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3079       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3080       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3081 
3082       c->i            = a->i;
3083       c->j            = a->j;
3084       c->singlemalloc = PETSC_FALSE;
3085       c->free_a       = PETSC_TRUE;
3086       c->free_ij      = PETSC_FALSE;
3087       c->parent       = A;
3088       C->preallocated = PETSC_TRUE;
3089       C->assembled    = PETSC_TRUE;
3090 
3091       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3092       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3093       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3094     } else {
3095       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3096       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3097 
3098       c->singlemalloc = PETSC_TRUE;
3099       c->free_a       = PETSC_TRUE;
3100       c->free_ij      = PETSC_TRUE;
3101 
3102       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3103       if (mbs > 0) {
3104         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3105         if (cpvalues == MAT_COPY_VALUES) {
3106           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3107         } else {
3108           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3109         }
3110       }
3111       C->preallocated = PETSC_TRUE;
3112       C->assembled    = PETSC_TRUE;
3113     }
3114   }
3115 
3116   c->roworiented = a->roworiented;
3117   c->nonew       = a->nonew;
3118 
3119   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3120   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3121 
3122   c->bs2         = a->bs2;
3123   c->mbs         = a->mbs;
3124   c->nbs         = a->nbs;
3125 
3126   if (a->diag) {
3127     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3128       c->diag      = a->diag;
3129       c->free_diag = PETSC_FALSE;
3130     } else {
3131       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3132       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3133       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3134       c->free_diag = PETSC_TRUE;
3135     }
3136   } else c->diag = 0;
3137 
3138   c->nz         = a->nz;
3139   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3140   c->solve_work = NULL;
3141   c->mult_work  = NULL;
3142   c->sor_workt  = NULL;
3143   c->sor_work   = NULL;
3144 
3145   c->compressedrow.use   = a->compressedrow.use;
3146   c->compressedrow.nrows = a->compressedrow.nrows;
3147   if (a->compressedrow.use) {
3148     i    = a->compressedrow.nrows;
3149     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3150     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3151     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3152     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3153   } else {
3154     c->compressedrow.use    = PETSC_FALSE;
3155     c->compressedrow.i      = NULL;
3156     c->compressedrow.rindex = NULL;
3157   }
3158   C->nonzerostate = A->nonzerostate;
3159 
3160   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3161   PetscFunctionReturn(0);
3162 }
3163 
3164 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3165 {
3166   PetscErrorCode ierr;
3167 
3168   PetscFunctionBegin;
3169   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3170   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3171   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3172   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3173   PetscFunctionReturn(0);
3174 }
3175 
3176 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3177 {
3178   Mat_SeqBAIJ    *a;
3179   PetscErrorCode ierr;
3180   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3181   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3182   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3183   PetscInt       *masked,nmask,tmp,bs2,ishift;
3184   PetscMPIInt    size;
3185   int            fd;
3186   PetscScalar    *aa;
3187   MPI_Comm       comm;
3188 
3189   PetscFunctionBegin;
3190   /* force binary viewer to load .info file if it has not yet done so */
3191   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3192   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3193   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3194   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3195   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3196   if (bs < 0) bs = 1;
3197   bs2  = bs*bs;
3198 
3199   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3200   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3201   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3202   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3203   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3204   M = header[1]; N = header[2]; nz = header[3];
3205 
3206   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3207   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3208 
3209   /*
3210      This code adds extra rows to make sure the number of rows is
3211     divisible by the blocksize
3212   */
3213   mbs        = M/bs;
3214   extra_rows = bs - M + bs*(mbs);
3215   if (extra_rows == bs) extra_rows = 0;
3216   else mbs++;
3217   if (extra_rows) {
3218     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3219   }
3220 
3221   /* Set global sizes if not already set */
3222   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3223     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3224   } else { /* Check if the matrix global sizes are correct */
3225     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3226     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3227       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3228     }
3229     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3230   }
3231 
3232   /* read in row lengths */
3233   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3234   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3235   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3236 
3237   /* read in column indices */
3238   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3239   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3240   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3241 
3242   /* loop over row lengths determining block row lengths */
3243   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3244   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3245   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3246   rowcount = 0;
3247   nzcount  = 0;
3248   for (i=0; i<mbs; i++) {
3249     nmask = 0;
3250     for (j=0; j<bs; j++) {
3251       kmax = rowlengths[rowcount];
3252       for (k=0; k<kmax; k++) {
3253         tmp = jj[nzcount++]/bs;
3254         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3255       }
3256       rowcount++;
3257     }
3258     browlengths[i] += nmask;
3259     /* zero out the mask elements we set */
3260     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3261   }
3262 
3263   /* Do preallocation  */
3264   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3265   a    = (Mat_SeqBAIJ*)newmat->data;
3266 
3267   /* set matrix "i" values */
3268   a->i[0] = 0;
3269   for (i=1; i<= mbs; i++) {
3270     a->i[i]      = a->i[i-1] + browlengths[i-1];
3271     a->ilen[i-1] = browlengths[i-1];
3272   }
3273   a->nz = 0;
3274   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3275 
3276   /* read in nonzero values */
3277   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3278   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3279   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3280 
3281   /* set "a" and "j" values into matrix */
3282   nzcount = 0; jcount = 0;
3283   for (i=0; i<mbs; i++) {
3284     nzcountb = nzcount;
3285     nmask    = 0;
3286     for (j=0; j<bs; j++) {
3287       kmax = rowlengths[i*bs+j];
3288       for (k=0; k<kmax; k++) {
3289         tmp = jj[nzcount++]/bs;
3290         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3291       }
3292     }
3293     /* sort the masked values */
3294     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3295 
3296     /* set "j" values into matrix */
3297     maskcount = 1;
3298     for (j=0; j<nmask; j++) {
3299       a->j[jcount++]  = masked[j];
3300       mask[masked[j]] = maskcount++;
3301     }
3302     /* set "a" values into matrix */
3303     ishift = bs2*a->i[i];
3304     for (j=0; j<bs; j++) {
3305       kmax = rowlengths[i*bs+j];
3306       for (k=0; k<kmax; k++) {
3307         tmp       = jj[nzcountb]/bs;
3308         block     = mask[tmp] - 1;
3309         point     = jj[nzcountb] - bs*tmp;
3310         idx       = ishift + bs2*block + j + bs*point;
3311         a->a[idx] = (MatScalar)aa[nzcountb++];
3312       }
3313     }
3314     /* zero out the mask elements we set */
3315     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3316   }
3317   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3318 
3319   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3320   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3321   ierr = PetscFree(aa);CHKERRQ(ierr);
3322   ierr = PetscFree(jj);CHKERRQ(ierr);
3323   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3324 
3325   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3326   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3327   PetscFunctionReturn(0);
3328 }
3329 
3330 /*@C
3331    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3332    compressed row) format.  For good matrix assembly performance the
3333    user should preallocate the matrix storage by setting the parameter nz
3334    (or the array nnz).  By setting these parameters accurately, performance
3335    during matrix assembly can be increased by more than a factor of 50.
3336 
3337    Collective on MPI_Comm
3338 
3339    Input Parameters:
3340 +  comm - MPI communicator, set to PETSC_COMM_SELF
3341 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3342           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3343 .  m - number of rows
3344 .  n - number of columns
3345 .  nz - number of nonzero blocks  per block row (same for all rows)
3346 -  nnz - array containing the number of nonzero blocks in the various block rows
3347          (possibly different for each block row) or NULL
3348 
3349    Output Parameter:
3350 .  A - the matrix
3351 
3352    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3353    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3354    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3355 
3356    Options Database Keys:
3357 .   -mat_no_unroll - uses code that does not unroll the loops in the
3358                      block calculations (much slower)
3359 .    -mat_block_size - size of the blocks to use
3360 
3361    Level: intermediate
3362 
3363    Notes:
3364    The number of rows and columns must be divisible by blocksize.
3365 
3366    If the nnz parameter is given then the nz parameter is ignored
3367 
3368    A nonzero block is any block that as 1 or more nonzeros in it
3369 
3370    The block AIJ format is fully compatible with standard Fortran 77
3371    storage.  That is, the stored row and column indices can begin at
3372    either one (as in Fortran) or zero.  See the users' manual for details.
3373 
3374    Specify the preallocated storage with either nz or nnz (not both).
3375    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3376    allocation.  See Users-Manual: ch_mat for details.
3377    matrices.
3378 
3379 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3380 @*/
3381 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3382 {
3383   PetscErrorCode ierr;
3384 
3385   PetscFunctionBegin;
3386   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3387   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3388   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3389   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3390   PetscFunctionReturn(0);
3391 }
3392 
3393 /*@C
3394    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3395    per row in the matrix. For good matrix assembly performance the
3396    user should preallocate the matrix storage by setting the parameter nz
3397    (or the array nnz).  By setting these parameters accurately, performance
3398    during matrix assembly can be increased by more than a factor of 50.
3399 
3400    Collective on MPI_Comm
3401 
3402    Input Parameters:
3403 +  B - the matrix
3404 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3405           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3406 .  nz - number of block nonzeros per block row (same for all rows)
3407 -  nnz - array containing the number of block nonzeros in the various block rows
3408          (possibly different for each block row) or NULL
3409 
3410    Options Database Keys:
3411 .   -mat_no_unroll - uses code that does not unroll the loops in the
3412                      block calculations (much slower)
3413 .   -mat_block_size - size of the blocks to use
3414 
3415    Level: intermediate
3416 
3417    Notes:
3418    If the nnz parameter is given then the nz parameter is ignored
3419 
3420    You can call MatGetInfo() to get information on how effective the preallocation was;
3421    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3422    You can also run with the option -info and look for messages with the string
3423    malloc in them to see if additional memory allocation was needed.
3424 
3425    The block AIJ format is fully compatible with standard Fortran 77
3426    storage.  That is, the stored row and column indices can begin at
3427    either one (as in Fortran) or zero.  See the users' manual for details.
3428 
3429    Specify the preallocated storage with either nz or nnz (not both).
3430    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3431    allocation.  See Users-Manual: ch_mat for details.
3432 
3433 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3434 @*/
3435 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3436 {
3437   PetscErrorCode ierr;
3438 
3439   PetscFunctionBegin;
3440   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3441   PetscValidType(B,1);
3442   PetscValidLogicalCollectiveInt(B,bs,2);
3443   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3444   PetscFunctionReturn(0);
3445 }
3446 
3447 /*@C
3448    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3449    (the default sequential PETSc format).
3450 
3451    Collective on MPI_Comm
3452 
3453    Input Parameters:
3454 +  B - the matrix
3455 .  i - the indices into j for the start of each local row (starts with zero)
3456 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3457 -  v - optional values in the matrix
3458 
3459    Level: developer
3460 
3461    Notes:
3462    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3463    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3464    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3465    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3466    block column and the second index is over columns within a block.
3467 
3468 .keywords: matrix, aij, compressed row, sparse
3469 
3470 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3471 @*/
3472 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3473 {
3474   PetscErrorCode ierr;
3475 
3476   PetscFunctionBegin;
3477   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3478   PetscValidType(B,1);
3479   PetscValidLogicalCollectiveInt(B,bs,2);
3480   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3481   PetscFunctionReturn(0);
3482 }
3483 
3484 
3485 /*@
3486      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3487 
3488      Collective on MPI_Comm
3489 
3490    Input Parameters:
3491 +  comm - must be an MPI communicator of size 1
3492 .  bs - size of block
3493 .  m - number of rows
3494 .  n - number of columns
3495 .  i - row indices
3496 .  j - column indices
3497 -  a - matrix values
3498 
3499    Output Parameter:
3500 .  mat - the matrix
3501 
3502    Level: advanced
3503 
3504    Notes:
3505        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3506     once the matrix is destroyed
3507 
3508        You cannot set new nonzero locations into this matrix, that will generate an error.
3509 
3510        The i and j indices are 0 based
3511 
3512        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3513 
3514       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3515       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3516       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3517       with column-major ordering within blocks.
3518 
3519 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3520 
3521 @*/
3522 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3523 {
3524   PetscErrorCode ierr;
3525   PetscInt       ii;
3526   Mat_SeqBAIJ    *baij;
3527 
3528   PetscFunctionBegin;
3529   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3530   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3531 
3532   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3533   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3534   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3535   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3536   baij = (Mat_SeqBAIJ*)(*mat)->data;
3537   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3538   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3539 
3540   baij->i = i;
3541   baij->j = j;
3542   baij->a = a;
3543 
3544   baij->singlemalloc = PETSC_FALSE;
3545   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3546   baij->free_a       = PETSC_FALSE;
3547   baij->free_ij      = PETSC_FALSE;
3548 
3549   for (ii=0; ii<m; ii++) {
3550     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3551 #if defined(PETSC_USE_DEBUG)
3552     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3553 #endif
3554   }
3555 #if defined(PETSC_USE_DEBUG)
3556   for (ii=0; ii<baij->i[m]; ii++) {
3557     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3558     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3559   }
3560 #endif
3561 
3562   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3563   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3564   PetscFunctionReturn(0);
3565 }
3566 
3567 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3568 {
3569   PetscErrorCode ierr;
3570   PetscMPIInt    size;
3571 
3572   PetscFunctionBegin;
3573   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3574   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3575     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3576   } else {
3577     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3578   }
3579   PetscFunctionReturn(0);
3580 }
3581