xref: /petsc/src/mat/impls/baij/seq/baij.c (revision ccb4e88a40f0b86eaeca07ff64c64e4de2fae686)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 
20 PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A,PetscInt type,PetscReal *reductions)
21 {
22   PetscErrorCode ierr;
23   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) A->data;
24   PetscInt       m,n,i;
25   PetscInt       ib,jb,bs = A->rmap->bs;
26   MatScalar      *a_val = a_aij->a;
27 
28   PetscFunctionBegin;
29   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
30   for (i=0; i<n; i++) reductions[i] = 0.0;
31   if (type == NORM_2) {
32     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
33       for (jb=0; jb<bs; jb++) {
34         for (ib=0; ib<bs; ib++) {
35           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
36           a_val++;
37         }
38       }
39     }
40   } else if (type == NORM_1) {
41     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
42       for (jb=0; jb<bs; jb++) {
43         for (ib=0; ib<bs; ib++) {
44           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
45           a_val++;
46         }
47       }
48     }
49   } else if (type == NORM_INFINITY) {
50     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
51       for (jb=0; jb<bs; jb++) {
52         for (ib=0; ib<bs; ib++) {
53           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
54           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
55           a_val++;
56         }
57       }
58     }
59   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
60     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
61       for (jb=0; jb<bs; jb++) {
62         for (ib=0; ib<bs; ib++) {
63           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
64           a_val++;
65         }
66       }
67     }
68   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
69     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
70       for (jb=0; jb<bs; jb++) {
71         for (ib=0; ib<bs; ib++) {
72           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
73           a_val++;
74         }
75       }
76     }
77   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
78   if (type == NORM_2) {
79     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
80   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
81     for (i=0; i<n; i++) reductions[i] /= m;
82   }
83   PetscFunctionReturn(0);
84 }
85 
86 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
87 {
88   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
89   PetscErrorCode ierr;
90   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
91   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
92   PetscReal      shift = 0.0;
93   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
94 
95   PetscFunctionBegin;
96   allowzeropivot = PetscNot(A->erroriffailure);
97 
98   if (a->idiagvalid) {
99     if (values) *values = a->idiag;
100     PetscFunctionReturn(0);
101   }
102   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
103   diag_offset = a->diag;
104   if (!a->idiag) {
105     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
106     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
107   }
108   diag  = a->idiag;
109   if (values) *values = a->idiag;
110   /* factor and invert each block */
111   switch (bs) {
112   case 1:
113     for (i=0; i<mbs; i++) {
114       odiag    = v + 1*diag_offset[i];
115       diag[0]  = odiag[0];
116 
117       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
118         if (allowzeropivot) {
119           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
120           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
121           A->factorerror_zeropivot_row   = i;
122           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
123         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
124       }
125 
126       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
127       diag    += 1;
128     }
129     break;
130   case 2:
131     for (i=0; i<mbs; i++) {
132       odiag    = v + 4*diag_offset[i];
133       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
134       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
135       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
136       diag    += 4;
137     }
138     break;
139   case 3:
140     for (i=0; i<mbs; i++) {
141       odiag    = v + 9*diag_offset[i];
142       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
143       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
144       diag[8]  = odiag[8];
145       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
146       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
147       diag    += 9;
148     }
149     break;
150   case 4:
151     for (i=0; i<mbs; i++) {
152       odiag  = v + 16*diag_offset[i];
153       ierr   = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr);
154       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
155       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
156       diag  += 16;
157     }
158     break;
159   case 5:
160     for (i=0; i<mbs; i++) {
161       odiag  = v + 25*diag_offset[i];
162       ierr   = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr);
163       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
164       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
165       diag  += 25;
166     }
167     break;
168   case 6:
169     for (i=0; i<mbs; i++) {
170       odiag  = v + 36*diag_offset[i];
171       ierr   = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr);
172       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
173       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
174       diag  += 36;
175     }
176     break;
177   case 7:
178     for (i=0; i<mbs; i++) {
179       odiag  = v + 49*diag_offset[i];
180       ierr   = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr);
181       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
182       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
183       diag  += 49;
184     }
185     break;
186   default:
187     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
188     for (i=0; i<mbs; i++) {
189       odiag  = v + bs2*diag_offset[i];
190       ierr   = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr);
191       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
192       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
193       diag  += bs2;
194     }
195     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
196   }
197   a->idiagvalid = PETSC_TRUE;
198   PetscFunctionReturn(0);
199 }
200 
201 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
202 {
203   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
204   PetscScalar       *x,*work,*w,*workt,*t;
205   const MatScalar   *v,*aa = a->a, *idiag;
206   const PetscScalar *b,*xb;
207   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
208   PetscErrorCode    ierr;
209   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
210   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
211 
212   PetscFunctionBegin;
213   its = its*lits;
214   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
215   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
216   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
217   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor");
218   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
219 
220   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
221 
222   if (!m) PetscFunctionReturn(0);
223   diag  = a->diag;
224   idiag = a->idiag;
225   k    = PetscMax(A->rmap->n,A->cmap->n);
226   if (!a->mult_work) {
227     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
228   }
229   if (!a->sor_workt) {
230     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
231   }
232   if (!a->sor_work) {
233     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
234   }
235   work = a->mult_work;
236   t    = a->sor_workt;
237   w    = a->sor_work;
238 
239   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
240   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
241 
242   if (flag & SOR_ZERO_INITIAL_GUESS) {
243     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
244       switch (bs) {
245       case 1:
246         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
247         t[0] = b[0];
248         i2     = 1;
249         idiag += 1;
250         for (i=1; i<m; i++) {
251           v  = aa + ai[i];
252           vi = aj + ai[i];
253           nz = diag[i] - ai[i];
254           s[0] = b[i2];
255           for (j=0; j<nz; j++) {
256             xw[0] = x[vi[j]];
257             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
258           }
259           t[i2] = s[0];
260           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
261           x[i2]  = xw[0];
262           idiag += 1;
263           i2    += 1;
264         }
265         break;
266       case 2:
267         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
268         t[0] = b[0]; t[1] = b[1];
269         i2     = 2;
270         idiag += 4;
271         for (i=1; i<m; i++) {
272           v  = aa + 4*ai[i];
273           vi = aj + ai[i];
274           nz = diag[i] - ai[i];
275           s[0] = b[i2]; s[1] = b[i2+1];
276           for (j=0; j<nz; j++) {
277             idx = 2*vi[j];
278             it  = 4*j;
279             xw[0] = x[idx]; xw[1] = x[1+idx];
280             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
281           }
282           t[i2] = s[0]; t[i2+1] = s[1];
283           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
284           x[i2]   = xw[0]; x[i2+1] = xw[1];
285           idiag  += 4;
286           i2     += 2;
287         }
288         break;
289       case 3:
290         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
291         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
292         i2     = 3;
293         idiag += 9;
294         for (i=1; i<m; i++) {
295           v  = aa + 9*ai[i];
296           vi = aj + ai[i];
297           nz = diag[i] - ai[i];
298           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
299           while (nz--) {
300             idx = 3*(*vi++);
301             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
302             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
303             v  += 9;
304           }
305           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
306           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
307           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
308           idiag  += 9;
309           i2     += 3;
310         }
311         break;
312       case 4:
313         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
314         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
315         i2     = 4;
316         idiag += 16;
317         for (i=1; i<m; i++) {
318           v  = aa + 16*ai[i];
319           vi = aj + ai[i];
320           nz = diag[i] - ai[i];
321           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
322           while (nz--) {
323             idx = 4*(*vi++);
324             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
325             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
326             v  += 16;
327           }
328           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
329           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
330           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
331           idiag  += 16;
332           i2     += 4;
333         }
334         break;
335       case 5:
336         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
337         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
338         i2     = 5;
339         idiag += 25;
340         for (i=1; i<m; i++) {
341           v  = aa + 25*ai[i];
342           vi = aj + ai[i];
343           nz = diag[i] - ai[i];
344           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
345           while (nz--) {
346             idx = 5*(*vi++);
347             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
348             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
349             v  += 25;
350           }
351           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
352           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
353           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
354           idiag  += 25;
355           i2     += 5;
356         }
357         break;
358       case 6:
359         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
360         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
361         i2     = 6;
362         idiag += 36;
363         for (i=1; i<m; i++) {
364           v  = aa + 36*ai[i];
365           vi = aj + ai[i];
366           nz = diag[i] - ai[i];
367           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
368           while (nz--) {
369             idx = 6*(*vi++);
370             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
371             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
372             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
373             v  += 36;
374           }
375           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
376           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
377           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
378           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
379           idiag  += 36;
380           i2     += 6;
381         }
382         break;
383       case 7:
384         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
385         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
386         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
387         i2     = 7;
388         idiag += 49;
389         for (i=1; i<m; i++) {
390           v  = aa + 49*ai[i];
391           vi = aj + ai[i];
392           nz = diag[i] - ai[i];
393           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
394           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
395           while (nz--) {
396             idx = 7*(*vi++);
397             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
398             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
399             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
400             v  += 49;
401           }
402           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
403           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
404           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
405           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
406           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
407           idiag  += 49;
408           i2     += 7;
409         }
410         break;
411       default:
412         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
413         ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr);
414         i2     = bs;
415         idiag += bs2;
416         for (i=1; i<m; i++) {
417           v  = aa + bs2*ai[i];
418           vi = aj + ai[i];
419           nz = diag[i] - ai[i];
420 
421           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
422           /* copy all rows of x that are needed into contiguous space */
423           workt = work;
424           for (j=0; j<nz; j++) {
425             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
426             workt += bs;
427           }
428           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
429           ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr);
430           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
431 
432           idiag += bs2;
433           i2    += bs;
434         }
435         break;
436       }
437       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
438       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
439       xb = t;
440     }
441     else xb = b;
442     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
443       idiag = a->idiag+bs2*(a->mbs-1);
444       i2 = bs * (m-1);
445       switch (bs) {
446       case 1:
447         s[0]  = xb[i2];
448         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
449         x[i2] = xw[0];
450         i2   -= 1;
451         for (i=m-2; i>=0; i--) {
452           v  = aa + (diag[i]+1);
453           vi = aj + diag[i] + 1;
454           nz = ai[i+1] - diag[i] - 1;
455           s[0] = xb[i2];
456           for (j=0; j<nz; j++) {
457             xw[0] = x[vi[j]];
458             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
459           }
460           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
461           x[i2]  = xw[0];
462           idiag -= 1;
463           i2    -= 1;
464         }
465         break;
466       case 2:
467         s[0]  = xb[i2]; s[1] = xb[i2+1];
468         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
469         x[i2] = xw[0]; x[i2+1] = xw[1];
470         i2    -= 2;
471         idiag -= 4;
472         for (i=m-2; i>=0; i--) {
473           v  = aa + 4*(diag[i] + 1);
474           vi = aj + diag[i] + 1;
475           nz = ai[i+1] - diag[i] - 1;
476           s[0] = xb[i2]; s[1] = xb[i2+1];
477           for (j=0; j<nz; j++) {
478             idx = 2*vi[j];
479             it  = 4*j;
480             xw[0] = x[idx]; xw[1] = x[1+idx];
481             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
482           }
483           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
484           x[i2]   = xw[0]; x[i2+1] = xw[1];
485           idiag  -= 4;
486           i2     -= 2;
487         }
488         break;
489       case 3:
490         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
491         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
492         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
493         i2    -= 3;
494         idiag -= 9;
495         for (i=m-2; i>=0; i--) {
496           v  = aa + 9*(diag[i]+1);
497           vi = aj + diag[i] + 1;
498           nz = ai[i+1] - diag[i] - 1;
499           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
500           while (nz--) {
501             idx = 3*(*vi++);
502             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
503             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
504             v  += 9;
505           }
506           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
507           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
508           idiag  -= 9;
509           i2     -= 3;
510         }
511         break;
512       case 4:
513         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
514         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
515         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
516         i2    -= 4;
517         idiag -= 16;
518         for (i=m-2; i>=0; i--) {
519           v  = aa + 16*(diag[i]+1);
520           vi = aj + diag[i] + 1;
521           nz = ai[i+1] - diag[i] - 1;
522           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
523           while (nz--) {
524             idx = 4*(*vi++);
525             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
526             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
527             v  += 16;
528           }
529           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
530           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
531           idiag  -= 16;
532           i2     -= 4;
533         }
534         break;
535       case 5:
536         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
537         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
538         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
539         i2    -= 5;
540         idiag -= 25;
541         for (i=m-2; i>=0; i--) {
542           v  = aa + 25*(diag[i]+1);
543           vi = aj + diag[i] + 1;
544           nz = ai[i+1] - diag[i] - 1;
545           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
546           while (nz--) {
547             idx = 5*(*vi++);
548             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
549             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
550             v  += 25;
551           }
552           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
553           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
554           idiag  -= 25;
555           i2     -= 5;
556         }
557         break;
558       case 6:
559         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
560         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
561         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
562         i2    -= 6;
563         idiag -= 36;
564         for (i=m-2; i>=0; i--) {
565           v  = aa + 36*(diag[i]+1);
566           vi = aj + diag[i] + 1;
567           nz = ai[i+1] - diag[i] - 1;
568           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
569           while (nz--) {
570             idx = 6*(*vi++);
571             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
572             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
573             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
574             v  += 36;
575           }
576           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
577           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
578           idiag  -= 36;
579           i2     -= 6;
580         }
581         break;
582       case 7:
583         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
584         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
585         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
586         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
587         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
588         i2    -= 7;
589         idiag -= 49;
590         for (i=m-2; i>=0; i--) {
591           v  = aa + 49*(diag[i]+1);
592           vi = aj + diag[i] + 1;
593           nz = ai[i+1] - diag[i] - 1;
594           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
595           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
596           while (nz--) {
597             idx = 7*(*vi++);
598             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
599             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
600             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
601             v  += 49;
602           }
603           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
604           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
605           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
606           idiag  -= 49;
607           i2     -= 7;
608         }
609         break;
610       default:
611         ierr  = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
612         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
613         i2    -= bs;
614         idiag -= bs2;
615         for (i=m-2; i>=0; i--) {
616           v  = aa + bs2*(diag[i]+1);
617           vi = aj + diag[i] + 1;
618           nz = ai[i+1] - diag[i] - 1;
619 
620           ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
621           /* copy all rows of x that are needed into contiguous space */
622           workt = work;
623           for (j=0; j<nz; j++) {
624             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
625             workt += bs;
626           }
627           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
628           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
629 
630           idiag -= bs2;
631           i2    -= bs;
632         }
633         break;
634       }
635       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
636     }
637     its--;
638   }
639   while (its--) {
640     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
641       idiag = a->idiag;
642       i2 = 0;
643       switch (bs) {
644       case 1:
645         for (i=0; i<m; i++) {
646           v  = aa + ai[i];
647           vi = aj + ai[i];
648           nz = ai[i+1] - ai[i];
649           s[0] = b[i2];
650           for (j=0; j<nz; j++) {
651             xw[0] = x[vi[j]];
652             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
653           }
654           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
655           x[i2] += xw[0];
656           idiag += 1;
657           i2    += 1;
658         }
659         break;
660       case 2:
661         for (i=0; i<m; i++) {
662           v  = aa + 4*ai[i];
663           vi = aj + ai[i];
664           nz = ai[i+1] - ai[i];
665           s[0] = b[i2]; s[1] = b[i2+1];
666           for (j=0; j<nz; j++) {
667             idx = 2*vi[j];
668             it  = 4*j;
669             xw[0] = x[idx]; xw[1] = x[1+idx];
670             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
671           }
672           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
673           x[i2]  += xw[0]; x[i2+1] += xw[1];
674           idiag  += 4;
675           i2     += 2;
676         }
677         break;
678       case 3:
679         for (i=0; i<m; i++) {
680           v  = aa + 9*ai[i];
681           vi = aj + ai[i];
682           nz = ai[i+1] - ai[i];
683           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
684           while (nz--) {
685             idx = 3*(*vi++);
686             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
687             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
688             v  += 9;
689           }
690           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
691           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
692           idiag  += 9;
693           i2     += 3;
694         }
695         break;
696       case 4:
697         for (i=0; i<m; i++) {
698           v  = aa + 16*ai[i];
699           vi = aj + ai[i];
700           nz = ai[i+1] - ai[i];
701           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
702           while (nz--) {
703             idx = 4*(*vi++);
704             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
705             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
706             v  += 16;
707           }
708           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
709           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
710           idiag  += 16;
711           i2     += 4;
712         }
713         break;
714       case 5:
715         for (i=0; i<m; i++) {
716           v  = aa + 25*ai[i];
717           vi = aj + ai[i];
718           nz = ai[i+1] - ai[i];
719           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
720           while (nz--) {
721             idx = 5*(*vi++);
722             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
723             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
724             v  += 25;
725           }
726           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
727           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
728           idiag  += 25;
729           i2     += 5;
730         }
731         break;
732       case 6:
733         for (i=0; i<m; i++) {
734           v  = aa + 36*ai[i];
735           vi = aj + ai[i];
736           nz = ai[i+1] - ai[i];
737           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
738           while (nz--) {
739             idx = 6*(*vi++);
740             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
741             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
742             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
743             v  += 36;
744           }
745           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
746           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
747           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
748           idiag  += 36;
749           i2     += 6;
750         }
751         break;
752       case 7:
753         for (i=0; i<m; i++) {
754           v  = aa + 49*ai[i];
755           vi = aj + ai[i];
756           nz = ai[i+1] - ai[i];
757           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
758           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
759           while (nz--) {
760             idx = 7*(*vi++);
761             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
762             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
763             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
764             v  += 49;
765           }
766           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
767           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
768           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
769           idiag  += 49;
770           i2     += 7;
771         }
772         break;
773       default:
774         for (i=0; i<m; i++) {
775           v  = aa + bs2*ai[i];
776           vi = aj + ai[i];
777           nz = ai[i+1] - ai[i];
778 
779           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
780           /* copy all rows of x that are needed into contiguous space */
781           workt = work;
782           for (j=0; j<nz; j++) {
783             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
784             workt += bs;
785           }
786           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
787           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
788 
789           idiag += bs2;
790           i2    += bs;
791         }
792         break;
793       }
794       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
795     }
796     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
797       idiag = a->idiag+bs2*(a->mbs-1);
798       i2 = bs * (m-1);
799       switch (bs) {
800       case 1:
801         for (i=m-1; i>=0; i--) {
802           v  = aa + ai[i];
803           vi = aj + ai[i];
804           nz = ai[i+1] - ai[i];
805           s[0] = b[i2];
806           for (j=0; j<nz; j++) {
807             xw[0] = x[vi[j]];
808             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
809           }
810           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
811           x[i2] += xw[0];
812           idiag -= 1;
813           i2    -= 1;
814         }
815         break;
816       case 2:
817         for (i=m-1; i>=0; i--) {
818           v  = aa + 4*ai[i];
819           vi = aj + ai[i];
820           nz = ai[i+1] - ai[i];
821           s[0] = b[i2]; s[1] = b[i2+1];
822           for (j=0; j<nz; j++) {
823             idx = 2*vi[j];
824             it  = 4*j;
825             xw[0] = x[idx]; xw[1] = x[1+idx];
826             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
827           }
828           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
829           x[i2]  += xw[0]; x[i2+1] += xw[1];
830           idiag  -= 4;
831           i2     -= 2;
832         }
833         break;
834       case 3:
835         for (i=m-1; i>=0; i--) {
836           v  = aa + 9*ai[i];
837           vi = aj + ai[i];
838           nz = ai[i+1] - ai[i];
839           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
840           while (nz--) {
841             idx = 3*(*vi++);
842             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
843             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
844             v  += 9;
845           }
846           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
847           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
848           idiag  -= 9;
849           i2     -= 3;
850         }
851         break;
852       case 4:
853         for (i=m-1; i>=0; i--) {
854           v  = aa + 16*ai[i];
855           vi = aj + ai[i];
856           nz = ai[i+1] - ai[i];
857           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
858           while (nz--) {
859             idx = 4*(*vi++);
860             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
861             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
862             v  += 16;
863           }
864           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
865           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
866           idiag  -= 16;
867           i2     -= 4;
868         }
869         break;
870       case 5:
871         for (i=m-1; i>=0; i--) {
872           v  = aa + 25*ai[i];
873           vi = aj + ai[i];
874           nz = ai[i+1] - ai[i];
875           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
876           while (nz--) {
877             idx = 5*(*vi++);
878             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
879             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
880             v  += 25;
881           }
882           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
883           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
884           idiag  -= 25;
885           i2     -= 5;
886         }
887         break;
888       case 6:
889         for (i=m-1; i>=0; i--) {
890           v  = aa + 36*ai[i];
891           vi = aj + ai[i];
892           nz = ai[i+1] - ai[i];
893           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
894           while (nz--) {
895             idx = 6*(*vi++);
896             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
897             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
898             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
899             v  += 36;
900           }
901           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
902           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
903           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
904           idiag  -= 36;
905           i2     -= 6;
906         }
907         break;
908       case 7:
909         for (i=m-1; i>=0; i--) {
910           v  = aa + 49*ai[i];
911           vi = aj + ai[i];
912           nz = ai[i+1] - ai[i];
913           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
914           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
915           while (nz--) {
916             idx = 7*(*vi++);
917             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
918             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
919             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
920             v  += 49;
921           }
922           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
923           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
924           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
925           idiag  -= 49;
926           i2     -= 7;
927         }
928         break;
929       default:
930         for (i=m-1; i>=0; i--) {
931           v  = aa + bs2*ai[i];
932           vi = aj + ai[i];
933           nz = ai[i+1] - ai[i];
934 
935           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
936           /* copy all rows of x that are needed into contiguous space */
937           workt = work;
938           for (j=0; j<nz; j++) {
939             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
940             workt += bs;
941           }
942           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
943           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
944 
945           idiag -= bs2;
946           i2    -= bs;
947         }
948         break;
949       }
950       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
951     }
952   }
953   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
954   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
955   PetscFunctionReturn(0);
956 }
957 
958 /*
959     Special version for direct calls from Fortran (Used in PETSc-fun3d)
960 */
961 #if defined(PETSC_HAVE_FORTRAN_CAPS)
962 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
963 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
964 #define matsetvaluesblocked4_ matsetvaluesblocked4
965 #endif
966 
967 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
968 {
969   Mat               A  = *AA;
970   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
971   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
972   PetscInt          *ai    =a->i,*ailen=a->ilen;
973   PetscInt          *aj    =a->j,stepval,lastcol = -1;
974   const PetscScalar *value = v;
975   MatScalar         *ap,*aa = a->a,*bap;
976   PetscErrorCode    ierr;
977 
978   PetscFunctionBegin;
979   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
980   stepval = (n-1)*4;
981   for (k=0; k<m; k++) { /* loop over added rows */
982     row  = im[k];
983     rp   = aj + ai[row];
984     ap   = aa + 16*ai[row];
985     nrow = ailen[row];
986     low  = 0;
987     high = nrow;
988     for (l=0; l<n; l++) { /* loop over added columns */
989       col = in[l];
990       if (col <= lastcol)  low = 0;
991       else                high = nrow;
992       lastcol = col;
993       value   = v + k*(stepval+4 + l)*4;
994       while (high-low > 7) {
995         t = (low+high)/2;
996         if (rp[t] > col) high = t;
997         else             low  = t;
998       }
999       for (i=low; i<high; i++) {
1000         if (rp[i] > col) break;
1001         if (rp[i] == col) {
1002           bap = ap +  16*i;
1003           for (ii=0; ii<4; ii++,value+=stepval) {
1004             for (jj=ii; jj<16; jj+=4) {
1005               bap[jj] += *value++;
1006             }
1007           }
1008           goto noinsert2;
1009         }
1010       }
1011       N = nrow++ - 1;
1012       high++; /* added new column index thus must search to one higher than before */
1013       /* shift up all the later entries in this row */
1014       for (ii=N; ii>=i; ii--) {
1015         rp[ii+1] = rp[ii];
1016         ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
1017       }
1018       if (N >= i) {
1019         ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1020       }
1021       rp[i] = col;
1022       bap   = ap +  16*i;
1023       for (ii=0; ii<4; ii++,value+=stepval) {
1024         for (jj=ii; jj<16; jj+=4) {
1025           bap[jj] = *value++;
1026         }
1027       }
1028       noinsert2:;
1029       low = i;
1030     }
1031     ailen[row] = nrow;
1032   }
1033   PetscFunctionReturnVoid();
1034 }
1035 
1036 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1037 #define matsetvalues4_ MATSETVALUES4
1038 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1039 #define matsetvalues4_ matsetvalues4
1040 #endif
1041 
1042 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1043 {
1044   Mat         A  = *AA;
1045   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1046   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
1047   PetscInt    *ai=a->i,*ailen=a->ilen;
1048   PetscInt    *aj=a->j,brow,bcol;
1049   PetscInt    ridx,cidx,lastcol = -1;
1050   MatScalar   *ap,value,*aa=a->a,*bap;
1051   PetscErrorCode ierr;
1052 
1053   PetscFunctionBegin;
1054   for (k=0; k<m; k++) { /* loop over added rows */
1055     row  = im[k]; brow = row/4;
1056     rp   = aj + ai[brow];
1057     ap   = aa + 16*ai[brow];
1058     nrow = ailen[brow];
1059     low  = 0;
1060     high = nrow;
1061     for (l=0; l<n; l++) { /* loop over added columns */
1062       col   = in[l]; bcol = col/4;
1063       ridx  = row % 4; cidx = col % 4;
1064       value = v[l + k*n];
1065       if (col <= lastcol)  low = 0;
1066       else                high = nrow;
1067       lastcol = col;
1068       while (high-low > 7) {
1069         t = (low+high)/2;
1070         if (rp[t] > bcol) high = t;
1071         else              low  = t;
1072       }
1073       for (i=low; i<high; i++) {
1074         if (rp[i] > bcol) break;
1075         if (rp[i] == bcol) {
1076           bap   = ap +  16*i + 4*cidx + ridx;
1077           *bap += value;
1078           goto noinsert1;
1079         }
1080       }
1081       N = nrow++ - 1;
1082       high++; /* added new column thus must search to one higher than before */
1083       /* shift up all the later entries in this row */
1084       ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
1085       ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
1086       ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1087       rp[i]                    = bcol;
1088       ap[16*i + 4*cidx + ridx] = value;
1089 noinsert1:;
1090       low = i;
1091     }
1092     ailen[brow] = nrow;
1093   }
1094   PetscFunctionReturnVoid();
1095 }
1096 
1097 /*
1098      Checks for missing diagonals
1099 */
1100 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1101 {
1102   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1103   PetscErrorCode ierr;
1104   PetscInt       *diag,*ii = a->i,i;
1105 
1106   PetscFunctionBegin;
1107   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1108   *missing = PETSC_FALSE;
1109   if (A->rmap->n > 0 && !ii) {
1110     *missing = PETSC_TRUE;
1111     if (d) *d = 0;
1112     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
1113   } else {
1114     PetscInt n;
1115     n = PetscMin(a->mbs, a->nbs);
1116     diag = a->diag;
1117     for (i=0; i<n; i++) {
1118       if (diag[i] >= ii[i+1]) {
1119         *missing = PETSC_TRUE;
1120         if (d) *d = i;
1121         ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr);
1122         break;
1123       }
1124     }
1125   }
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1130 {
1131   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1132   PetscErrorCode ierr;
1133   PetscInt       i,j,m = a->mbs;
1134 
1135   PetscFunctionBegin;
1136   if (!a->diag) {
1137     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1138     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1139     a->free_diag = PETSC_TRUE;
1140   }
1141   for (i=0; i<m; i++) {
1142     a->diag[i] = a->i[i+1];
1143     for (j=a->i[i]; j<a->i[i+1]; j++) {
1144       if (a->j[j] == i) {
1145         a->diag[i] = j;
1146         break;
1147       }
1148     }
1149   }
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1154 {
1155   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1156   PetscErrorCode ierr;
1157   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1158   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1159 
1160   PetscFunctionBegin;
1161   *nn = n;
1162   if (!ia) PetscFunctionReturn(0);
1163   if (symmetric) {
1164     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1165     nz   = tia[n];
1166   } else {
1167     tia = a->i; tja = a->j;
1168   }
1169 
1170   if (!blockcompressed && bs > 1) {
1171     (*nn) *= bs;
1172     /* malloc & create the natural set of indices */
1173     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1174     if (n) {
1175       (*ia)[0] = oshift;
1176       for (j=1; j<bs; j++) {
1177         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1178       }
1179     }
1180 
1181     for (i=1; i<n; i++) {
1182       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1183       for (j=1; j<bs; j++) {
1184         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1185       }
1186     }
1187     if (n) {
1188       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1189     }
1190 
1191     if (inja) {
1192       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1193       cnt = 0;
1194       for (i=0; i<n; i++) {
1195         for (j=0; j<bs; j++) {
1196           for (k=tia[i]; k<tia[i+1]; k++) {
1197             for (l=0; l<bs; l++) {
1198               (*ja)[cnt++] = bs*tja[k] + l;
1199             }
1200           }
1201         }
1202       }
1203     }
1204 
1205     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1206       ierr = PetscFree(tia);CHKERRQ(ierr);
1207       ierr = PetscFree(tja);CHKERRQ(ierr);
1208     }
1209   } else if (oshift == 1) {
1210     if (symmetric) {
1211       nz = tia[A->rmap->n/bs];
1212       /*  add 1 to i and j indices */
1213       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1214       *ia = tia;
1215       if (ja) {
1216         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1217         *ja = tja;
1218       }
1219     } else {
1220       nz = a->i[A->rmap->n/bs];
1221       /* malloc space and  add 1 to i and j indices */
1222       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1223       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1224       if (ja) {
1225         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1226         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1227       }
1228     }
1229   } else {
1230     *ia = tia;
1231     if (ja) *ja = tja;
1232   }
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1237 {
1238   PetscErrorCode ierr;
1239 
1240   PetscFunctionBegin;
1241   if (!ia) PetscFunctionReturn(0);
1242   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1243     ierr = PetscFree(*ia);CHKERRQ(ierr);
1244     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1245   }
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1250 {
1251   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1252   PetscErrorCode ierr;
1253 
1254   PetscFunctionBegin;
1255 #if defined(PETSC_USE_LOG)
1256   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1257 #endif
1258   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1259   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1260   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1261   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1262   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1263   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1264   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1265   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1266   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1267   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1268   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1269   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1270   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1271 
1272   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1273   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1274   ierr = PetscFree(A->data);CHKERRQ(ierr);
1275 
1276   ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1288 #if defined(PETSC_HAVE_HYPRE)
1289   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
1290 #endif
1291   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1296 {
1297   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1298   PetscErrorCode ierr;
1299 
1300   PetscFunctionBegin;
1301   switch (op) {
1302   case MAT_ROW_ORIENTED:
1303     a->roworiented = flg;
1304     break;
1305   case MAT_KEEP_NONZERO_PATTERN:
1306     a->keepnonzeropattern = flg;
1307     break;
1308   case MAT_NEW_NONZERO_LOCATIONS:
1309     a->nonew = (flg ? 0 : 1);
1310     break;
1311   case MAT_NEW_NONZERO_LOCATION_ERR:
1312     a->nonew = (flg ? -1 : 0);
1313     break;
1314   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1315     a->nonew = (flg ? -2 : 0);
1316     break;
1317   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1318     a->nounused = (flg ? -1 : 0);
1319     break;
1320   case MAT_FORCE_DIAGONAL_ENTRIES:
1321   case MAT_IGNORE_OFF_PROC_ENTRIES:
1322   case MAT_USE_HASH_TABLE:
1323   case MAT_SORTED_FULL:
1324     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1325     break;
1326   case MAT_SPD:
1327   case MAT_SYMMETRIC:
1328   case MAT_STRUCTURALLY_SYMMETRIC:
1329   case MAT_HERMITIAN:
1330   case MAT_SYMMETRY_ETERNAL:
1331   case MAT_SUBMAT_SINGLEIS:
1332   case MAT_STRUCTURE_ONLY:
1333     /* These options are handled directly by MatSetOption() */
1334     break;
1335   default:
1336     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1337   }
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1342 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1343 {
1344   PetscErrorCode ierr;
1345   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1346   MatScalar      *aa_i;
1347   PetscScalar    *v_i;
1348 
1349   PetscFunctionBegin;
1350   bs  = A->rmap->bs;
1351   bs2 = bs*bs;
1352   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1353 
1354   bn  = row/bs;   /* Block number */
1355   bp  = row % bs; /* Block Position */
1356   M   = ai[bn+1] - ai[bn];
1357   *nz = bs*M;
1358 
1359   if (v) {
1360     *v = NULL;
1361     if (*nz) {
1362       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1363       for (i=0; i<M; i++) { /* for each block in the block row */
1364         v_i  = *v + i*bs;
1365         aa_i = aa + bs2*(ai[bn] + i);
1366         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1367       }
1368     }
1369   }
1370 
1371   if (idx) {
1372     *idx = NULL;
1373     if (*nz) {
1374       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1375       for (i=0; i<M; i++) { /* for each block in the block row */
1376         idx_i = *idx + i*bs;
1377         itmp  = bs*aj[ai[bn] + i];
1378         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1379       }
1380     }
1381   }
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1386 {
1387   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1388   PetscErrorCode ierr;
1389 
1390   PetscFunctionBegin;
1391   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1396 {
1397   PetscErrorCode ierr;
1398 
1399   PetscFunctionBegin;
1400   if (nz)  *nz = 0;
1401   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1402   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1407 {
1408   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1409   Mat            C;
1410   PetscErrorCode ierr;
1411   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1412   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1413   MatScalar      *ata,*aa=a->a;
1414 
1415   PetscFunctionBegin;
1416   ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr);
1417   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1418     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1419 
1420     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1421     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1422     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1423     ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr);
1424 
1425     at  = (Mat_SeqBAIJ*)C->data;
1426     ati = at->i;
1427     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1428   } else {
1429     C = *B;
1430     at = (Mat_SeqBAIJ*)C->data;
1431     ati = at->i;
1432   }
1433 
1434   atj = at->j;
1435   ata = at->a;
1436 
1437   /* Copy ati into atfill so we have locations of the next free space in atj */
1438   ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr);
1439 
1440   /* Walk through A row-wise and mark nonzero entries of A^T. */
1441   for (i=0; i<mbs; i++) {
1442     anzj = ai[i+1] - ai[i];
1443     for (j=0; j<anzj; j++) {
1444       atj[atfill[*aj]] = i;
1445       for (kr=0; kr<bs; kr++) {
1446         for (k=0; k<bs; k++) {
1447           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1448         }
1449       }
1450       atfill[*aj++] += 1;
1451     }
1452   }
1453   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1454   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1455 
1456   /* Clean up temporary space and complete requests. */
1457   ierr = PetscFree(atfill);CHKERRQ(ierr);
1458 
1459   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1460     ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1461     *B = C;
1462   } else {
1463     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1464   }
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1469 {
1470   PetscErrorCode ierr;
1471   Mat            Btrans;
1472 
1473   PetscFunctionBegin;
1474   *f   = PETSC_FALSE;
1475   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1476   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1477   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1482 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
1483 {
1484   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)mat->data;
1485   PetscInt       header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1486   PetscInt       *rowlens,*colidxs;
1487   PetscScalar    *matvals;
1488   PetscErrorCode ierr;
1489 
1490   PetscFunctionBegin;
1491   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1492 
1493   M  = mat->rmap->N;
1494   N  = mat->cmap->N;
1495   m  = mat->rmap->n;
1496   bs = mat->rmap->bs;
1497   nz = bs*bs*A->nz;
1498 
1499   /* write matrix header */
1500   header[0] = MAT_FILE_CLASSID;
1501   header[1] = M; header[2] = N; header[3] = nz;
1502   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1503 
1504   /* store row lengths */
1505   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1506   for (cnt=0, i=0; i<A->mbs; i++)
1507     for (j=0; j<bs; j++)
1508       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1509   ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr);
1510   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1511 
1512   /* store column indices  */
1513   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1514   for (cnt=0, i=0; i<A->mbs; i++)
1515     for (k=0; k<bs; k++)
1516       for (j=A->i[i]; j<A->i[i+1]; j++)
1517         for (l=0; l<bs; l++)
1518           colidxs[cnt++] = bs*A->j[j] + l;
1519   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1520   ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr);
1521   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1522 
1523   /* store nonzero values */
1524   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1525   for (cnt=0, i=0; i<A->mbs; i++)
1526     for (k=0; k<bs; k++)
1527       for (j=A->i[i]; j<A->i[i+1]; j++)
1528         for (l=0; l<bs; l++)
1529           matvals[cnt++] = A->a[bs*(bs*j + l) + k];
1530   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1531   ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1532   ierr = PetscFree(matvals);CHKERRQ(ierr);
1533 
1534   /* write block size option to the viewer's .info file */
1535   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1540 {
1541   PetscErrorCode ierr;
1542   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1543   PetscInt       i,bs = A->rmap->bs,k;
1544 
1545   PetscFunctionBegin;
1546   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1547   for (i=0; i<a->mbs; i++) {
1548     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1549     for (k=a->i[i]; k<a->i[i+1]; k++) {
1550       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1551     }
1552     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1553   }
1554   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1559 {
1560   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1561   PetscErrorCode    ierr;
1562   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1563   PetscViewerFormat format;
1564 
1565   PetscFunctionBegin;
1566   if (A->structure_only) {
1567     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1568     PetscFunctionReturn(0);
1569   }
1570 
1571   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1572   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1573     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1574   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1575     const char *matname;
1576     Mat        aij;
1577     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1578     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1579     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1580     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1581     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1582   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1583       PetscFunctionReturn(0);
1584   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1585     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1586     for (i=0; i<a->mbs; i++) {
1587       for (j=0; j<bs; j++) {
1588         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1589         for (k=a->i[i]; k<a->i[i+1]; k++) {
1590           for (l=0; l<bs; l++) {
1591 #if defined(PETSC_USE_COMPLEX)
1592             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1593               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1594                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1595             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1596               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1597                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1598             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1599               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1600             }
1601 #else
1602             if (a->a[bs2*k + l*bs + j] != 0.0) {
1603               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1604             }
1605 #endif
1606           }
1607         }
1608         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1609       }
1610     }
1611     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1612   } else {
1613     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1614     for (i=0; i<a->mbs; i++) {
1615       for (j=0; j<bs; j++) {
1616         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1617         for (k=a->i[i]; k<a->i[i+1]; k++) {
1618           for (l=0; l<bs; l++) {
1619 #if defined(PETSC_USE_COMPLEX)
1620             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1621               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1622                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1623             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1624               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1625                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1626             } else {
1627               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1628             }
1629 #else
1630             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1631 #endif
1632           }
1633         }
1634         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1635       }
1636     }
1637     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1638   }
1639   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1640   PetscFunctionReturn(0);
1641 }
1642 
1643 #include <petscdraw.h>
1644 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1645 {
1646   Mat               A = (Mat) Aa;
1647   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1648   PetscErrorCode    ierr;
1649   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1650   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1651   MatScalar         *aa;
1652   PetscViewer       viewer;
1653   PetscViewerFormat format;
1654 
1655   PetscFunctionBegin;
1656   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1657   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1658   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1659 
1660   /* loop over matrix elements drawing boxes */
1661 
1662   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1663     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1664     /* Blue for negative, Cyan for zero and  Red for positive */
1665     color = PETSC_DRAW_BLUE;
1666     for (i=0,row=0; i<mbs; i++,row+=bs) {
1667       for (j=a->i[i]; j<a->i[i+1]; j++) {
1668         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1669         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1670         aa  = a->a + j*bs2;
1671         for (k=0; k<bs; k++) {
1672           for (l=0; l<bs; l++) {
1673             if (PetscRealPart(*aa++) >=  0.) continue;
1674             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1675           }
1676         }
1677       }
1678     }
1679     color = PETSC_DRAW_CYAN;
1680     for (i=0,row=0; i<mbs; i++,row+=bs) {
1681       for (j=a->i[i]; j<a->i[i+1]; j++) {
1682         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1683         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1684         aa  = a->a + j*bs2;
1685         for (k=0; k<bs; k++) {
1686           for (l=0; l<bs; l++) {
1687             if (PetscRealPart(*aa++) != 0.) continue;
1688             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1689           }
1690         }
1691       }
1692     }
1693     color = PETSC_DRAW_RED;
1694     for (i=0,row=0; i<mbs; i++,row+=bs) {
1695       for (j=a->i[i]; j<a->i[i+1]; j++) {
1696         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1697         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1698         aa  = a->a + j*bs2;
1699         for (k=0; k<bs; k++) {
1700           for (l=0; l<bs; l++) {
1701             if (PetscRealPart(*aa++) <= 0.) continue;
1702             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1703           }
1704         }
1705       }
1706     }
1707     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1708   } else {
1709     /* use contour shading to indicate magnitude of values */
1710     /* first determine max of all nonzero values */
1711     PetscReal minv = 0.0, maxv = 0.0;
1712     PetscDraw popup;
1713 
1714     for (i=0; i<a->nz*a->bs2; i++) {
1715       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1716     }
1717     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1718     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1719     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1720 
1721     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1722     for (i=0,row=0; i<mbs; i++,row+=bs) {
1723       for (j=a->i[i]; j<a->i[i+1]; j++) {
1724         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1725         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1726         aa  = a->a + j*bs2;
1727         for (k=0; k<bs; k++) {
1728           for (l=0; l<bs; l++) {
1729             MatScalar v = *aa++;
1730             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1731             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1732           }
1733         }
1734       }
1735     }
1736     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1737   }
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1742 {
1743   PetscErrorCode ierr;
1744   PetscReal      xl,yl,xr,yr,w,h;
1745   PetscDraw      draw;
1746   PetscBool      isnull;
1747 
1748   PetscFunctionBegin;
1749   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1750   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1751   if (isnull) PetscFunctionReturn(0);
1752 
1753   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1754   xr  += w;          yr += h;        xl = -w;     yl = -h;
1755   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1756   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1757   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1758   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1759   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1764 {
1765   PetscErrorCode ierr;
1766   PetscBool      iascii,isbinary,isdraw;
1767 
1768   PetscFunctionBegin;
1769   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1770   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1771   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1772   if (iascii) {
1773     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1774   } else if (isbinary) {
1775     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1776   } else if (isdraw) {
1777     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1778   } else {
1779     Mat B;
1780     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1781     ierr = MatView(B,viewer);CHKERRQ(ierr);
1782     ierr = MatDestroy(&B);CHKERRQ(ierr);
1783   }
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1788 {
1789   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1790   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1791   PetscInt    *ai = a->i,*ailen = a->ilen;
1792   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1793   MatScalar   *ap,*aa = a->a;
1794 
1795   PetscFunctionBegin;
1796   for (k=0; k<m; k++) { /* loop over rows */
1797     row = im[k]; brow = row/bs;
1798     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1799     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1800     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1801     nrow = ailen[brow];
1802     for (l=0; l<n; l++) { /* loop over columns */
1803       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1804       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1805       col  = in[l];
1806       bcol = col/bs;
1807       cidx = col%bs;
1808       ridx = row%bs;
1809       high = nrow;
1810       low  = 0; /* assume unsorted */
1811       while (high-low > 5) {
1812         t = (low+high)/2;
1813         if (rp[t] > bcol) high = t;
1814         else             low  = t;
1815       }
1816       for (i=low; i<high; i++) {
1817         if (rp[i] > bcol) break;
1818         if (rp[i] == bcol) {
1819           *v++ = ap[bs2*i+bs*cidx+ridx];
1820           goto finished;
1821         }
1822       }
1823       *v++ = 0.0;
1824 finished:;
1825     }
1826   }
1827   PetscFunctionReturn(0);
1828 }
1829 
1830 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1831 {
1832   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1833   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1834   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1835   PetscErrorCode    ierr;
1836   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1837   PetscBool         roworiented=a->roworiented;
1838   const PetscScalar *value     = v;
1839   MatScalar         *ap=NULL,*aa = a->a,*bap;
1840 
1841   PetscFunctionBegin;
1842   if (roworiented) {
1843     stepval = (n-1)*bs;
1844   } else {
1845     stepval = (m-1)*bs;
1846   }
1847   for (k=0; k<m; k++) { /* loop over added rows */
1848     row = im[k];
1849     if (row < 0) continue;
1850     if (PetscUnlikelyDebug(row >= a->mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1851     rp   = aj + ai[row];
1852     if (!A->structure_only) ap = aa + bs2*ai[row];
1853     rmax = imax[row];
1854     nrow = ailen[row];
1855     low  = 0;
1856     high = nrow;
1857     for (l=0; l<n; l++) { /* loop over added columns */
1858       if (in[l] < 0) continue;
1859       if (PetscUnlikelyDebug(in[l] >= a->nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1860       col = in[l];
1861       if (!A->structure_only) {
1862         if (roworiented) {
1863           value = v + (k*(stepval+bs) + l)*bs;
1864         } else {
1865           value = v + (l*(stepval+bs) + k)*bs;
1866         }
1867       }
1868       if (col <= lastcol) low = 0;
1869       else high = nrow;
1870       lastcol = col;
1871       while (high-low > 7) {
1872         t = (low+high)/2;
1873         if (rp[t] > col) high = t;
1874         else             low  = t;
1875       }
1876       for (i=low; i<high; i++) {
1877         if (rp[i] > col) break;
1878         if (rp[i] == col) {
1879           if (A->structure_only) goto noinsert2;
1880           bap = ap +  bs2*i;
1881           if (roworiented) {
1882             if (is == ADD_VALUES) {
1883               for (ii=0; ii<bs; ii++,value+=stepval) {
1884                 for (jj=ii; jj<bs2; jj+=bs) {
1885                   bap[jj] += *value++;
1886                 }
1887               }
1888             } else {
1889               for (ii=0; ii<bs; ii++,value+=stepval) {
1890                 for (jj=ii; jj<bs2; jj+=bs) {
1891                   bap[jj] = *value++;
1892                 }
1893               }
1894             }
1895           } else {
1896             if (is == ADD_VALUES) {
1897               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1898                 for (jj=0; jj<bs; jj++) {
1899                   bap[jj] += value[jj];
1900                 }
1901                 bap += bs;
1902               }
1903             } else {
1904               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1905                 for (jj=0; jj<bs; jj++) {
1906                   bap[jj]  = value[jj];
1907                 }
1908                 bap += bs;
1909               }
1910             }
1911           }
1912           goto noinsert2;
1913         }
1914       }
1915       if (nonew == 1) goto noinsert2;
1916       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1917       if (A->structure_only) {
1918         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1919       } else {
1920         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1921       }
1922       N = nrow++ - 1; high++;
1923       /* shift up all the later entries in this row */
1924       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
1925       rp[i] = col;
1926       if (!A->structure_only) {
1927         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
1928         bap   = ap +  bs2*i;
1929         if (roworiented) {
1930           for (ii=0; ii<bs; ii++,value+=stepval) {
1931             for (jj=ii; jj<bs2; jj+=bs) {
1932               bap[jj] = *value++;
1933             }
1934           }
1935         } else {
1936           for (ii=0; ii<bs; ii++,value+=stepval) {
1937             for (jj=0; jj<bs; jj++) {
1938               *bap++ = *value++;
1939             }
1940           }
1941         }
1942       }
1943 noinsert2:;
1944       low = i;
1945     }
1946     ailen[row] = nrow;
1947   }
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1952 {
1953   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1954   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1955   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1956   PetscErrorCode ierr;
1957   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1958   MatScalar      *aa  = a->a,*ap;
1959   PetscReal      ratio=0.6;
1960 
1961   PetscFunctionBegin;
1962   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1963 
1964   if (m) rmax = ailen[0];
1965   for (i=1; i<mbs; i++) {
1966     /* move each row back by the amount of empty slots (fshift) before it*/
1967     fshift += imax[i-1] - ailen[i-1];
1968     rmax    = PetscMax(rmax,ailen[i]);
1969     if (fshift) {
1970       ip = aj + ai[i];
1971       ap = aa + bs2*ai[i];
1972       N  = ailen[i];
1973       ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr);
1974       if (!A->structure_only) {
1975         ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr);
1976       }
1977     }
1978     ai[i] = ai[i-1] + ailen[i-1];
1979   }
1980   if (mbs) {
1981     fshift += imax[mbs-1] - ailen[mbs-1];
1982     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1983   }
1984 
1985   /* reset ilen and imax for each row */
1986   a->nonzerorowcnt = 0;
1987   if (A->structure_only) {
1988     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1989   } else { /* !A->structure_only */
1990     for (i=0; i<mbs; i++) {
1991       ailen[i] = imax[i] = ai[i+1] - ai[i];
1992       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1993     }
1994   }
1995   a->nz = ai[mbs];
1996 
1997   /* diagonals may have moved, so kill the diagonal pointers */
1998   a->idiagvalid = PETSC_FALSE;
1999   if (fshift && a->diag) {
2000     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
2001     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2002     a->diag = NULL;
2003   }
2004   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2005   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2006   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2007   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2008 
2009   A->info.mallocs    += a->reallocs;
2010   a->reallocs         = 0;
2011   A->info.nz_unneeded = (PetscReal)fshift*bs2;
2012   a->rmax             = rmax;
2013 
2014   if (!A->structure_only) {
2015     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2016   }
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 /*
2021    This function returns an array of flags which indicate the locations of contiguous
2022    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2023    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2024    Assume: sizes should be long enough to hold all the values.
2025 */
2026 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2027 {
2028   PetscInt  i,j,k,row;
2029   PetscBool flg;
2030 
2031   PetscFunctionBegin;
2032   for (i=0,j=0; i<n; j++) {
2033     row = idx[i];
2034     if (row%bs!=0) { /* Not the beginning of a block */
2035       sizes[j] = 1;
2036       i++;
2037     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2038       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2039       i++;
2040     } else { /* Begining of the block, so check if the complete block exists */
2041       flg = PETSC_TRUE;
2042       for (k=1; k<bs; k++) {
2043         if (row+k != idx[i+k]) { /* break in the block */
2044           flg = PETSC_FALSE;
2045           break;
2046         }
2047       }
2048       if (flg) { /* No break in the bs */
2049         sizes[j] = bs;
2050         i       += bs;
2051       } else {
2052         sizes[j] = 1;
2053         i++;
2054       }
2055     }
2056   }
2057   *bs_max = j;
2058   PetscFunctionReturn(0);
2059 }
2060 
2061 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2062 {
2063   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2064   PetscErrorCode    ierr;
2065   PetscInt          i,j,k,count,*rows;
2066   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2067   PetscScalar       zero = 0.0;
2068   MatScalar         *aa;
2069   const PetscScalar *xx;
2070   PetscScalar       *bb;
2071 
2072   PetscFunctionBegin;
2073   /* fix right hand side if needed */
2074   if (x && b) {
2075     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2076     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2077     for (i=0; i<is_n; i++) {
2078       bb[is_idx[i]] = diag*xx[is_idx[i]];
2079     }
2080     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2081     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2082   }
2083 
2084   /* Make a copy of the IS and  sort it */
2085   /* allocate memory for rows,sizes */
2086   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2087 
2088   /* copy IS values to rows, and sort them */
2089   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2090   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2091 
2092   if (baij->keepnonzeropattern) {
2093     for (i=0; i<is_n; i++) sizes[i] = 1;
2094     bs_max          = is_n;
2095   } else {
2096     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2097     A->nonzerostate++;
2098   }
2099 
2100   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2101     row = rows[j];
2102     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2103     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2104     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2105     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2106       if (diag != (PetscScalar)0.0) {
2107         if (baij->ilen[row/bs] > 0) {
2108           baij->ilen[row/bs]       = 1;
2109           baij->j[baij->i[row/bs]] = row/bs;
2110 
2111           ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr);
2112         }
2113         /* Now insert all the diagonal values for this bs */
2114         for (k=0; k<bs; k++) {
2115           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2116         }
2117       } else { /* (diag == 0.0) */
2118         baij->ilen[row/bs] = 0;
2119       } /* end (diag == 0.0) */
2120     } else { /* (sizes[i] != bs) */
2121       if (PetscUnlikelyDebug(sizes[i] != 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2122       for (k=0; k<count; k++) {
2123         aa[0] =  zero;
2124         aa   += bs;
2125       }
2126       if (diag != (PetscScalar)0.0) {
2127         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2128       }
2129     }
2130   }
2131 
2132   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2133   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2138 {
2139   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2140   PetscErrorCode    ierr;
2141   PetscInt          i,j,k,count;
2142   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2143   PetscScalar       zero = 0.0;
2144   MatScalar         *aa;
2145   const PetscScalar *xx;
2146   PetscScalar       *bb;
2147   PetscBool         *zeroed,vecs = PETSC_FALSE;
2148 
2149   PetscFunctionBegin;
2150   /* fix right hand side if needed */
2151   if (x && b) {
2152     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2153     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2154     vecs = PETSC_TRUE;
2155   }
2156 
2157   /* zero the columns */
2158   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2159   for (i=0; i<is_n; i++) {
2160     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2161     zeroed[is_idx[i]] = PETSC_TRUE;
2162   }
2163   for (i=0; i<A->rmap->N; i++) {
2164     if (!zeroed[i]) {
2165       row = i/bs;
2166       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2167         for (k=0; k<bs; k++) {
2168           col = bs*baij->j[j] + k;
2169           if (zeroed[col]) {
2170             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2171             if (vecs) bb[i] -= aa[0]*xx[col];
2172             aa[0] = 0.0;
2173           }
2174         }
2175       }
2176     } else if (vecs) bb[i] = diag*xx[i];
2177   }
2178   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2179   if (vecs) {
2180     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2181     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2182   }
2183 
2184   /* zero the rows */
2185   for (i=0; i<is_n; i++) {
2186     row   = is_idx[i];
2187     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2188     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2189     for (k=0; k<count; k++) {
2190       aa[0] =  zero;
2191       aa   += bs;
2192     }
2193     if (diag != (PetscScalar)0.0) {
2194       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2195     }
2196   }
2197   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2198   PetscFunctionReturn(0);
2199 }
2200 
2201 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2202 {
2203   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2204   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2205   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2206   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2207   PetscErrorCode ierr;
2208   PetscInt       ridx,cidx,bs2=a->bs2;
2209   PetscBool      roworiented=a->roworiented;
2210   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2211 
2212   PetscFunctionBegin;
2213   for (k=0; k<m; k++) { /* loop over added rows */
2214     row  = im[k];
2215     brow = row/bs;
2216     if (row < 0) continue;
2217     if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2218     rp   = aj + ai[brow];
2219     if (!A->structure_only) ap = aa + bs2*ai[brow];
2220     rmax = imax[brow];
2221     nrow = ailen[brow];
2222     low  = 0;
2223     high = nrow;
2224     for (l=0; l<n; l++) { /* loop over added columns */
2225       if (in[l] < 0) continue;
2226       if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2227       col  = in[l]; bcol = col/bs;
2228       ridx = row % bs; cidx = col % bs;
2229       if (!A->structure_only) {
2230         if (roworiented) {
2231           value = v[l + k*n];
2232         } else {
2233           value = v[k + l*m];
2234         }
2235       }
2236       if (col <= lastcol) low = 0; else high = nrow;
2237       lastcol = col;
2238       while (high-low > 7) {
2239         t = (low+high)/2;
2240         if (rp[t] > bcol) high = t;
2241         else              low  = t;
2242       }
2243       for (i=low; i<high; i++) {
2244         if (rp[i] > bcol) break;
2245         if (rp[i] == bcol) {
2246           bap = ap +  bs2*i + bs*cidx + ridx;
2247           if (!A->structure_only) {
2248             if (is == ADD_VALUES) *bap += value;
2249             else                  *bap  = value;
2250           }
2251           goto noinsert1;
2252         }
2253       }
2254       if (nonew == 1) goto noinsert1;
2255       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2256       if (A->structure_only) {
2257         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2258       } else {
2259         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2260       }
2261       N = nrow++ - 1; high++;
2262       /* shift up all the later entries in this row */
2263       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
2264       rp[i] = bcol;
2265       if (!A->structure_only) {
2266         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
2267         ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr);
2268         ap[bs2*i + bs*cidx + ridx] = value;
2269       }
2270       a->nz++;
2271       A->nonzerostate++;
2272 noinsert1:;
2273       low = i;
2274     }
2275     ailen[brow] = nrow;
2276   }
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2281 {
2282   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2283   Mat            outA;
2284   PetscErrorCode ierr;
2285   PetscBool      row_identity,col_identity;
2286 
2287   PetscFunctionBegin;
2288   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2289   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2290   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2291   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2292 
2293   outA            = inA;
2294   inA->factortype = MAT_FACTOR_LU;
2295   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2296   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2297 
2298   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2299 
2300   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2301   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2302   a->row = row;
2303   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2304   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2305   a->col = col;
2306 
2307   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2308   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2309   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2310   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2311 
2312   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2313   if (!a->solve_work) {
2314     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2315     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2316   }
2317   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2322 {
2323   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2324   PetscInt    i,nz,mbs;
2325 
2326   PetscFunctionBegin;
2327   nz  = baij->maxnz;
2328   mbs = baij->mbs;
2329   for (i=0; i<nz; i++) {
2330     baij->j[i] = indices[i];
2331   }
2332   baij->nz = nz;
2333   for (i=0; i<mbs; i++) {
2334     baij->ilen[i] = baij->imax[i];
2335   }
2336   PetscFunctionReturn(0);
2337 }
2338 
2339 /*@
2340     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2341        in the matrix.
2342 
2343   Input Parameters:
2344 +  mat - the SeqBAIJ matrix
2345 -  indices - the column indices
2346 
2347   Level: advanced
2348 
2349   Notes:
2350     This can be called if you have precomputed the nonzero structure of the
2351   matrix and want to provide it to the matrix object to improve the performance
2352   of the MatSetValues() operation.
2353 
2354     You MUST have set the correct numbers of nonzeros per row in the call to
2355   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2356 
2357     MUST be called before any calls to MatSetValues();
2358 
2359 @*/
2360 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2361 {
2362   PetscErrorCode ierr;
2363 
2364   PetscFunctionBegin;
2365   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2366   PetscValidPointer(indices,2);
2367   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2368   PetscFunctionReturn(0);
2369 }
2370 
2371 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2372 {
2373   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2374   PetscErrorCode ierr;
2375   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2376   PetscReal      atmp;
2377   PetscScalar    *x,zero = 0.0;
2378   MatScalar      *aa;
2379   PetscInt       ncols,brow,krow,kcol;
2380 
2381   PetscFunctionBegin;
2382   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2383   bs  = A->rmap->bs;
2384   aa  = a->a;
2385   ai  = a->i;
2386   aj  = a->j;
2387   mbs = a->mbs;
2388 
2389   ierr = VecSet(v,zero);CHKERRQ(ierr);
2390   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2391   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2392   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2393   for (i=0; i<mbs; i++) {
2394     ncols = ai[1] - ai[0]; ai++;
2395     brow  = bs*i;
2396     for (j=0; j<ncols; j++) {
2397       for (kcol=0; kcol<bs; kcol++) {
2398         for (krow=0; krow<bs; krow++) {
2399           atmp = PetscAbsScalar(*aa);aa++;
2400           row  = brow + krow;   /* row index */
2401           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2402         }
2403       }
2404       aj++;
2405     }
2406   }
2407   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2412 {
2413   PetscErrorCode ierr;
2414 
2415   PetscFunctionBegin;
2416   /* If the two matrices have the same copy implementation, use fast copy. */
2417   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2418     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2419     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2420     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2421 
2422     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2423     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2424     ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr);
2425     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2426   } else {
2427     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2428   }
2429   PetscFunctionReturn(0);
2430 }
2431 
2432 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2433 {
2434   PetscErrorCode ierr;
2435 
2436   PetscFunctionBegin;
2437   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2438   PetscFunctionReturn(0);
2439 }
2440 
2441 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2442 {
2443   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2444 
2445   PetscFunctionBegin;
2446   *array = a->a;
2447   PetscFunctionReturn(0);
2448 }
2449 
2450 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2451 {
2452   PetscFunctionBegin;
2453   *array = NULL;
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2458 {
2459   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2460   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2461   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2462   PetscErrorCode ierr;
2463 
2464   PetscFunctionBegin;
2465   /* Set the number of nonzeros in the new matrix */
2466   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2471 {
2472   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2473   PetscErrorCode ierr;
2474   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2475   PetscBLASInt   one=1;
2476 
2477   PetscFunctionBegin;
2478   if (str == SAME_NONZERO_PATTERN) {
2479     PetscScalar  alpha = a;
2480     PetscBLASInt bnz;
2481     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2482     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2483     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2484   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2485     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2486   } else {
2487     Mat      B;
2488     PetscInt *nnz;
2489     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2490     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2491     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2492     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2493     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2494     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2495     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2496     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2497     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2498     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2499     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2500     ierr = PetscFree(nnz);CHKERRQ(ierr);
2501   }
2502   PetscFunctionReturn(0);
2503 }
2504 
2505 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2506 {
2507 #if defined(PETSC_USE_COMPLEX)
2508   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2509   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2510   MatScalar   *aa = a->a;
2511 
2512   PetscFunctionBegin;
2513   for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]);
2514 #else
2515   PetscFunctionBegin;
2516 #endif
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2521 {
2522   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2523   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2524   MatScalar   *aa = a->a;
2525 
2526   PetscFunctionBegin;
2527   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2528   PetscFunctionReturn(0);
2529 }
2530 
2531 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2532 {
2533   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2534   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2535   MatScalar   *aa = a->a;
2536 
2537   PetscFunctionBegin;
2538   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2539   PetscFunctionReturn(0);
2540 }
2541 
2542 /*
2543     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2544 */
2545 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2546 {
2547   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2548   PetscErrorCode ierr;
2549   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2550   PetscInt       nz = a->i[m],row,*jj,mr,col;
2551 
2552   PetscFunctionBegin;
2553   *nn = n;
2554   if (!ia) PetscFunctionReturn(0);
2555   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2556   else {
2557     ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2558     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2559     ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2560     jj   = a->j;
2561     for (i=0; i<nz; i++) {
2562       collengths[jj[i]]++;
2563     }
2564     cia[0] = oshift;
2565     for (i=0; i<n; i++) {
2566       cia[i+1] = cia[i] + collengths[i];
2567     }
2568     ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2569     jj   = a->j;
2570     for (row=0; row<m; row++) {
2571       mr = a->i[row+1] - a->i[row];
2572       for (i=0; i<mr; i++) {
2573         col = *jj++;
2574 
2575         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2576       }
2577     }
2578     ierr = PetscFree(collengths);CHKERRQ(ierr);
2579     *ia  = cia; *ja = cja;
2580   }
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2585 {
2586   PetscErrorCode ierr;
2587 
2588   PetscFunctionBegin;
2589   if (!ia) PetscFunctionReturn(0);
2590   ierr = PetscFree(*ia);CHKERRQ(ierr);
2591   ierr = PetscFree(*ja);CHKERRQ(ierr);
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 /*
2596  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2597  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2598  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2599  */
2600 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2601 {
2602   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2603   PetscErrorCode ierr;
2604   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2605   PetscInt       nz = a->i[m],row,*jj,mr,col;
2606   PetscInt       *cspidx;
2607 
2608   PetscFunctionBegin;
2609   *nn = n;
2610   if (!ia) PetscFunctionReturn(0);
2611 
2612   ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2613   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2614   ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2615   ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr);
2616   jj   = a->j;
2617   for (i=0; i<nz; i++) {
2618     collengths[jj[i]]++;
2619   }
2620   cia[0] = oshift;
2621   for (i=0; i<n; i++) {
2622     cia[i+1] = cia[i] + collengths[i];
2623   }
2624   ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2625   jj   = a->j;
2626   for (row=0; row<m; row++) {
2627     mr = a->i[row+1] - a->i[row];
2628     for (i=0; i<mr; i++) {
2629       col = *jj++;
2630       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2631       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2632     }
2633   }
2634   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2635   *ia    = cia;
2636   *ja    = cja;
2637   *spidx = cspidx;
2638   PetscFunctionReturn(0);
2639 }
2640 
2641 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2642 {
2643   PetscErrorCode ierr;
2644 
2645   PetscFunctionBegin;
2646   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2647   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2648   PetscFunctionReturn(0);
2649 }
2650 
2651 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2652 {
2653   PetscErrorCode ierr;
2654   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2655 
2656   PetscFunctionBegin;
2657   if (!Y->preallocated || !aij->nz) {
2658     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2659   }
2660   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 /* -------------------------------------------------------------------*/
2665 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2666                                        MatGetRow_SeqBAIJ,
2667                                        MatRestoreRow_SeqBAIJ,
2668                                        MatMult_SeqBAIJ_N,
2669                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2670                                        MatMultTranspose_SeqBAIJ,
2671                                        MatMultTransposeAdd_SeqBAIJ,
2672                                        NULL,
2673                                        NULL,
2674                                        NULL,
2675                                /* 10*/ NULL,
2676                                        MatLUFactor_SeqBAIJ,
2677                                        NULL,
2678                                        NULL,
2679                                        MatTranspose_SeqBAIJ,
2680                                /* 15*/ MatGetInfo_SeqBAIJ,
2681                                        MatEqual_SeqBAIJ,
2682                                        MatGetDiagonal_SeqBAIJ,
2683                                        MatDiagonalScale_SeqBAIJ,
2684                                        MatNorm_SeqBAIJ,
2685                                /* 20*/ NULL,
2686                                        MatAssemblyEnd_SeqBAIJ,
2687                                        MatSetOption_SeqBAIJ,
2688                                        MatZeroEntries_SeqBAIJ,
2689                                /* 24*/ MatZeroRows_SeqBAIJ,
2690                                        NULL,
2691                                        NULL,
2692                                        NULL,
2693                                        NULL,
2694                                /* 29*/ MatSetUp_SeqBAIJ,
2695                                        NULL,
2696                                        NULL,
2697                                        NULL,
2698                                        NULL,
2699                                /* 34*/ MatDuplicate_SeqBAIJ,
2700                                        NULL,
2701                                        NULL,
2702                                        MatILUFactor_SeqBAIJ,
2703                                        NULL,
2704                                /* 39*/ MatAXPY_SeqBAIJ,
2705                                        MatCreateSubMatrices_SeqBAIJ,
2706                                        MatIncreaseOverlap_SeqBAIJ,
2707                                        MatGetValues_SeqBAIJ,
2708                                        MatCopy_SeqBAIJ,
2709                                /* 44*/ NULL,
2710                                        MatScale_SeqBAIJ,
2711                                        MatShift_SeqBAIJ,
2712                                        NULL,
2713                                        MatZeroRowsColumns_SeqBAIJ,
2714                                /* 49*/ NULL,
2715                                        MatGetRowIJ_SeqBAIJ,
2716                                        MatRestoreRowIJ_SeqBAIJ,
2717                                        MatGetColumnIJ_SeqBAIJ,
2718                                        MatRestoreColumnIJ_SeqBAIJ,
2719                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2720                                        NULL,
2721                                        NULL,
2722                                        NULL,
2723                                        MatSetValuesBlocked_SeqBAIJ,
2724                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2725                                        MatDestroy_SeqBAIJ,
2726                                        MatView_SeqBAIJ,
2727                                        NULL,
2728                                        NULL,
2729                                /* 64*/ NULL,
2730                                        NULL,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2735                                        NULL,
2736                                        MatConvert_Basic,
2737                                        NULL,
2738                                        NULL,
2739                                /* 74*/ NULL,
2740                                        MatFDColoringApply_BAIJ,
2741                                        NULL,
2742                                        NULL,
2743                                        NULL,
2744                                /* 79*/ NULL,
2745                                        NULL,
2746                                        NULL,
2747                                        NULL,
2748                                        MatLoad_SeqBAIJ,
2749                                /* 84*/ NULL,
2750                                        NULL,
2751                                        NULL,
2752                                        NULL,
2753                                        NULL,
2754                                /* 89*/ NULL,
2755                                        NULL,
2756                                        NULL,
2757                                        NULL,
2758                                        NULL,
2759                                /* 94*/ NULL,
2760                                        NULL,
2761                                        NULL,
2762                                        NULL,
2763                                        NULL,
2764                                /* 99*/ NULL,
2765                                        NULL,
2766                                        NULL,
2767                                        MatConjugate_SeqBAIJ,
2768                                        NULL,
2769                                /*104*/ NULL,
2770                                        MatRealPart_SeqBAIJ,
2771                                        MatImaginaryPart_SeqBAIJ,
2772                                        NULL,
2773                                        NULL,
2774                                /*109*/ NULL,
2775                                        NULL,
2776                                        NULL,
2777                                        NULL,
2778                                        MatMissingDiagonal_SeqBAIJ,
2779                                /*114*/ NULL,
2780                                        NULL,
2781                                        NULL,
2782                                        NULL,
2783                                        NULL,
2784                                /*119*/ NULL,
2785                                        NULL,
2786                                        MatMultHermitianTranspose_SeqBAIJ,
2787                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2788                                        NULL,
2789                                /*124*/ NULL,
2790                                        MatGetColumnReductions_SeqBAIJ,
2791                                        MatInvertBlockDiagonal_SeqBAIJ,
2792                                        NULL,
2793                                        NULL,
2794                                /*129*/ NULL,
2795                                        NULL,
2796                                        NULL,
2797                                        NULL,
2798                                        NULL,
2799                                /*134*/ NULL,
2800                                        NULL,
2801                                        NULL,
2802                                        NULL,
2803                                        NULL,
2804                                /*139*/ MatSetBlockSizes_Default,
2805                                        NULL,
2806                                        NULL,
2807                                        MatFDColoringSetUp_SeqXAIJ,
2808                                        NULL,
2809                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2810                                        MatDestroySubMatrices_SeqBAIJ
2811 };
2812 
2813 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2814 {
2815   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2816   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2817   PetscErrorCode ierr;
2818 
2819   PetscFunctionBegin;
2820   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2821 
2822   /* allocate space for values if not already there */
2823   if (!aij->saved_values) {
2824     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2825     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2826   }
2827 
2828   /* copy values over */
2829   ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr);
2830   PetscFunctionReturn(0);
2831 }
2832 
2833 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2834 {
2835   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2836   PetscErrorCode ierr;
2837   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2838 
2839   PetscFunctionBegin;
2840   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2841   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2842 
2843   /* copy values over */
2844   ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr);
2845   PetscFunctionReturn(0);
2846 }
2847 
2848 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2849 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2850 
2851 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2852 {
2853   Mat_SeqBAIJ    *b;
2854   PetscErrorCode ierr;
2855   PetscInt       i,mbs,nbs,bs2;
2856   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2857 
2858   PetscFunctionBegin;
2859   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2860   if (nz == MAT_SKIP_ALLOCATION) {
2861     skipallocation = PETSC_TRUE;
2862     nz             = 0;
2863   }
2864 
2865   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2866   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2867   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2868   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2869 
2870   B->preallocated = PETSC_TRUE;
2871 
2872   mbs = B->rmap->n/bs;
2873   nbs = B->cmap->n/bs;
2874   bs2 = bs*bs;
2875 
2876   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2877 
2878   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2879   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2880   if (nnz) {
2881     for (i=0; i<mbs; i++) {
2882       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2883       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2884     }
2885   }
2886 
2887   b    = (Mat_SeqBAIJ*)B->data;
2888   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2889   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2890   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2891 
2892   if (!flg) {
2893     switch (bs) {
2894     case 1:
2895       B->ops->mult    = MatMult_SeqBAIJ_1;
2896       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2897       break;
2898     case 2:
2899       B->ops->mult    = MatMult_SeqBAIJ_2;
2900       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2901       break;
2902     case 3:
2903       B->ops->mult    = MatMult_SeqBAIJ_3;
2904       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2905       break;
2906     case 4:
2907       B->ops->mult    = MatMult_SeqBAIJ_4;
2908       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2909       break;
2910     case 5:
2911       B->ops->mult    = MatMult_SeqBAIJ_5;
2912       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2913       break;
2914     case 6:
2915       B->ops->mult    = MatMult_SeqBAIJ_6;
2916       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2917       break;
2918     case 7:
2919       B->ops->mult    = MatMult_SeqBAIJ_7;
2920       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2921       break;
2922     case 9:
2923     {
2924       PetscInt version = 1;
2925       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
2926       switch (version) {
2927 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2928       case 1:
2929         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2930         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2931         ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2932         break;
2933 #endif
2934       default:
2935         B->ops->mult    = MatMult_SeqBAIJ_N;
2936         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2937         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2938         break;
2939       }
2940       break;
2941     }
2942     case 11:
2943       B->ops->mult    = MatMult_SeqBAIJ_11;
2944       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2945       break;
2946     case 12:
2947     {
2948       PetscInt version = 1;
2949       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
2950       switch (version) {
2951       case 1:
2952         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
2953         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2954         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2955         break;
2956       case 2:
2957         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
2958         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
2959         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2960         break;
2961 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2962       case 3:
2963         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
2964         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2965         ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2966         break;
2967 #endif
2968       default:
2969         B->ops->mult    = MatMult_SeqBAIJ_N;
2970         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2971         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2972         break;
2973       }
2974       break;
2975     }
2976     case 15:
2977     {
2978       PetscInt version = 1;
2979       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
2980       switch (version) {
2981       case 1:
2982         B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2983         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2984         break;
2985       case 2:
2986         B->ops->mult    = MatMult_SeqBAIJ_15_ver2;
2987         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2988         break;
2989       case 3:
2990         B->ops->mult    = MatMult_SeqBAIJ_15_ver3;
2991         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2992         break;
2993       case 4:
2994         B->ops->mult    = MatMult_SeqBAIJ_15_ver4;
2995         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2996         break;
2997       default:
2998         B->ops->mult    = MatMult_SeqBAIJ_N;
2999         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
3000         break;
3001       }
3002       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3003       break;
3004     }
3005     default:
3006       B->ops->mult    = MatMult_SeqBAIJ_N;
3007       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3008       ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
3009       break;
3010     }
3011   }
3012   B->ops->sor = MatSOR_SeqBAIJ;
3013   b->mbs = mbs;
3014   b->nbs = nbs;
3015   if (!skipallocation) {
3016     if (!b->imax) {
3017       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
3018       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3019 
3020       b->free_imax_ilen = PETSC_TRUE;
3021     }
3022     /* b->ilen will count nonzeros in each block row so far. */
3023     for (i=0; i<mbs; i++) b->ilen[i] = 0;
3024     if (!nnz) {
3025       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3026       else if (nz < 0) nz = 1;
3027       nz = PetscMin(nz,nbs);
3028       for (i=0; i<mbs; i++) b->imax[i] = nz;
3029       ierr = PetscIntMultError(nz,mbs,&nz);CHKERRQ(ierr);
3030     } else {
3031       PetscInt64 nz64 = 0;
3032       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
3033       ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr);
3034     }
3035 
3036     /* allocate the matrix space */
3037     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3038     if (B->structure_only) {
3039       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
3040       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
3041       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
3042     } else {
3043       PetscInt nzbs2 = 0;
3044       ierr = PetscIntMultError(nz,bs2,&nzbs2);CHKERRQ(ierr);
3045       ierr = PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
3046       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3047       ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr);
3048     }
3049     ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr);
3050 
3051     if (B->structure_only) {
3052       b->singlemalloc = PETSC_FALSE;
3053       b->free_a       = PETSC_FALSE;
3054     } else {
3055       b->singlemalloc = PETSC_TRUE;
3056       b->free_a       = PETSC_TRUE;
3057     }
3058     b->free_ij = PETSC_TRUE;
3059 
3060     b->i[0] = 0;
3061     for (i=1; i<mbs+1; i++) {
3062       b->i[i] = b->i[i-1] + b->imax[i-1];
3063     }
3064 
3065   } else {
3066     b->free_a  = PETSC_FALSE;
3067     b->free_ij = PETSC_FALSE;
3068   }
3069 
3070   b->bs2              = bs2;
3071   b->mbs              = mbs;
3072   b->nz               = 0;
3073   b->maxnz            = nz;
3074   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3075   B->was_assembled    = PETSC_FALSE;
3076   B->assembled        = PETSC_FALSE;
3077   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3078   PetscFunctionReturn(0);
3079 }
3080 
3081 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3082 {
3083   PetscInt       i,m,nz,nz_max=0,*nnz;
3084   PetscScalar    *values=NULL;
3085   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
3086   PetscErrorCode ierr;
3087 
3088   PetscFunctionBegin;
3089   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3090   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3091   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3092   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3093   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3094   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3095   m    = B->rmap->n/bs;
3096 
3097   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3098   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
3099   for (i=0; i<m; i++) {
3100     nz = ii[i+1]- ii[i];
3101     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3102     nz_max = PetscMax(nz_max, nz);
3103     nnz[i] = nz;
3104   }
3105   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3106   ierr = PetscFree(nnz);CHKERRQ(ierr);
3107 
3108   values = (PetscScalar*)V;
3109   if (!values) {
3110     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
3111   }
3112   for (i=0; i<m; i++) {
3113     PetscInt          ncols  = ii[i+1] - ii[i];
3114     const PetscInt    *icols = jj + ii[i];
3115     if (bs == 1 || !roworiented) {
3116       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3117       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3118     } else {
3119       PetscInt j;
3120       for (j=0; j<ncols; j++) {
3121         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3122         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
3123       }
3124     }
3125   }
3126   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3127   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3128   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3129   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3130   PetscFunctionReturn(0);
3131 }
3132 
3133 /*@C
3134    MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
3135 
3136    Not Collective
3137 
3138    Input Parameter:
3139 .  mat - a MATSEQBAIJ matrix
3140 
3141    Output Parameter:
3142 .   array - pointer to the data
3143 
3144    Level: intermediate
3145 
3146 .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3147 @*/
3148 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3149 {
3150   PetscErrorCode ierr;
3151 
3152   PetscFunctionBegin;
3153   ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3154   PetscFunctionReturn(0);
3155 }
3156 
3157 /*@C
3158    MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3159 
3160    Not Collective
3161 
3162    Input Parameters:
3163 +  mat - a MATSEQBAIJ matrix
3164 -  array - pointer to the data
3165 
3166    Level: intermediate
3167 
3168 .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3169 @*/
3170 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3171 {
3172   PetscErrorCode ierr;
3173 
3174   PetscFunctionBegin;
3175   ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3176   PetscFunctionReturn(0);
3177 }
3178 
3179 /*MC
3180    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3181    block sparse compressed row format.
3182 
3183    Options Database Keys:
3184 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3185 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3186 
3187    Level: beginner
3188 
3189    Notes:
3190     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3191     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
3192 
3193    Run with -info to see what version of the matrix-vector product is being used
3194 
3195 .seealso: MatCreateSeqBAIJ()
3196 M*/
3197 
3198 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3199 
3200 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3201 {
3202   PetscErrorCode ierr;
3203   PetscMPIInt    size;
3204   Mat_SeqBAIJ    *b;
3205 
3206   PetscFunctionBegin;
3207   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
3208   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3209 
3210   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3211   B->data = (void*)b;
3212   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3213 
3214   b->row          = NULL;
3215   b->col          = NULL;
3216   b->icol         = NULL;
3217   b->reallocs     = 0;
3218   b->saved_values = NULL;
3219 
3220   b->roworiented        = PETSC_TRUE;
3221   b->nonew              = 0;
3222   b->diag               = NULL;
3223   B->spptr              = NULL;
3224   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3225   b->keepnonzeropattern = PETSC_FALSE;
3226 
3227   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr);
3228   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr);
3229   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3230   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3231   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3232   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3233   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3234   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3235   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3236   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3237 #if defined(PETSC_HAVE_HYPRE)
3238   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3239 #endif
3240   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3241   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3242   PetscFunctionReturn(0);
3243 }
3244 
3245 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3246 {
3247   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3248   PetscErrorCode ierr;
3249   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3250 
3251   PetscFunctionBegin;
3252   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3253 
3254   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3255     c->imax           = a->imax;
3256     c->ilen           = a->ilen;
3257     c->free_imax_ilen = PETSC_FALSE;
3258   } else {
3259     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3260     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3261     for (i=0; i<mbs; i++) {
3262       c->imax[i] = a->imax[i];
3263       c->ilen[i] = a->ilen[i];
3264     }
3265     c->free_imax_ilen = PETSC_TRUE;
3266   }
3267 
3268   /* allocate the matrix space */
3269   if (mallocmatspace) {
3270     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3271       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3272       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3273 
3274       c->i            = a->i;
3275       c->j            = a->j;
3276       c->singlemalloc = PETSC_FALSE;
3277       c->free_a       = PETSC_TRUE;
3278       c->free_ij      = PETSC_FALSE;
3279       c->parent       = A;
3280       C->preallocated = PETSC_TRUE;
3281       C->assembled    = PETSC_TRUE;
3282 
3283       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3284       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3285       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3286     } else {
3287       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3288       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3289 
3290       c->singlemalloc = PETSC_TRUE;
3291       c->free_a       = PETSC_TRUE;
3292       c->free_ij      = PETSC_TRUE;
3293 
3294       ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr);
3295       if (mbs > 0) {
3296         ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr);
3297         if (cpvalues == MAT_COPY_VALUES) {
3298           ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr);
3299         } else {
3300           ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr);
3301         }
3302       }
3303       C->preallocated = PETSC_TRUE;
3304       C->assembled    = PETSC_TRUE;
3305     }
3306   }
3307 
3308   c->roworiented = a->roworiented;
3309   c->nonew       = a->nonew;
3310 
3311   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3312   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3313 
3314   c->bs2         = a->bs2;
3315   c->mbs         = a->mbs;
3316   c->nbs         = a->nbs;
3317 
3318   if (a->diag) {
3319     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3320       c->diag      = a->diag;
3321       c->free_diag = PETSC_FALSE;
3322     } else {
3323       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3324       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3325       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3326       c->free_diag = PETSC_TRUE;
3327     }
3328   } else c->diag = NULL;
3329 
3330   c->nz         = a->nz;
3331   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3332   c->solve_work = NULL;
3333   c->mult_work  = NULL;
3334   c->sor_workt  = NULL;
3335   c->sor_work   = NULL;
3336 
3337   c->compressedrow.use   = a->compressedrow.use;
3338   c->compressedrow.nrows = a->compressedrow.nrows;
3339   if (a->compressedrow.use) {
3340     i    = a->compressedrow.nrows;
3341     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3342     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3343     ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr);
3344     ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr);
3345   } else {
3346     c->compressedrow.use    = PETSC_FALSE;
3347     c->compressedrow.i      = NULL;
3348     c->compressedrow.rindex = NULL;
3349   }
3350   C->nonzerostate = A->nonzerostate;
3351 
3352   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3353   PetscFunctionReturn(0);
3354 }
3355 
3356 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3357 {
3358   PetscErrorCode ierr;
3359 
3360   PetscFunctionBegin;
3361   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3362   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3363   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3364   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3365   PetscFunctionReturn(0);
3366 }
3367 
3368 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3369 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3370 {
3371   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3372   PetscInt       *rowidxs,*colidxs;
3373   PetscScalar    *matvals;
3374   PetscErrorCode ierr;
3375 
3376   PetscFunctionBegin;
3377   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3378 
3379   /* read matrix header */
3380   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3381   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3382   M = header[1]; N = header[2]; nz = header[3];
3383   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3384   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3385   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ");
3386 
3387   /* set block sizes from the viewer's .info file */
3388   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3389   /* set local and global sizes if not set already */
3390   if (mat->rmap->n < 0) mat->rmap->n = M;
3391   if (mat->cmap->n < 0) mat->cmap->n = N;
3392   if (mat->rmap->N < 0) mat->rmap->N = M;
3393   if (mat->cmap->N < 0) mat->cmap->N = N;
3394   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3395   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3396 
3397   /* check if the matrix sizes are correct */
3398   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3399   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3400   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3401   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3402   mbs = m/bs; nbs = n/bs;
3403 
3404   /* read in row lengths, column indices and nonzero values */
3405   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3406   ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr);
3407   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3408   sum = rowidxs[m];
3409   if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3410 
3411   /* read in column indices and nonzero values */
3412   ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr);
3413   ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr);
3414   ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr);
3415 
3416   { /* preallocate matrix storage */
3417     PetscBT   bt; /* helper bit set to count nonzeros */
3418     PetscInt  *nnz;
3419     PetscBool sbaij;
3420 
3421     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3422     ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr);
3423     ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr);
3424     for (i=0; i<mbs; i++) {
3425       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3426       for (k=0; k<bs; k++) {
3427         PetscInt row = bs*i + k;
3428         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3429           PetscInt col = colidxs[j];
3430           if (!sbaij || col >= row)
3431             if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++;
3432         }
3433       }
3434     }
3435     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3436     ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3437     ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3438     ierr = PetscFree(nnz);CHKERRQ(ierr);
3439   }
3440 
3441   /* store matrix values */
3442   for (i=0; i<m; i++) {
3443     PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3444     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3445   }
3446 
3447   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3448   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3449   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3450   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3451   PetscFunctionReturn(0);
3452 }
3453 
3454 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3455 {
3456   PetscErrorCode ierr;
3457   PetscBool      isbinary;
3458 
3459   PetscFunctionBegin;
3460   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3461   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3462   ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3463   PetscFunctionReturn(0);
3464 }
3465 
3466 /*@C
3467    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3468    compressed row) format.  For good matrix assembly performance the
3469    user should preallocate the matrix storage by setting the parameter nz
3470    (or the array nnz).  By setting these parameters accurately, performance
3471    during matrix assembly can be increased by more than a factor of 50.
3472 
3473    Collective
3474 
3475    Input Parameters:
3476 +  comm - MPI communicator, set to PETSC_COMM_SELF
3477 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3478           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3479 .  m - number of rows
3480 .  n - number of columns
3481 .  nz - number of nonzero blocks  per block row (same for all rows)
3482 -  nnz - array containing the number of nonzero blocks in the various block rows
3483          (possibly different for each block row) or NULL
3484 
3485    Output Parameter:
3486 .  A - the matrix
3487 
3488    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3489    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3490    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3491 
3492    Options Database Keys:
3493 +   -mat_no_unroll - uses code that does not unroll the loops in the
3494                      block calculations (much slower)
3495 -    -mat_block_size - size of the blocks to use
3496 
3497    Level: intermediate
3498 
3499    Notes:
3500    The number of rows and columns must be divisible by blocksize.
3501 
3502    If the nnz parameter is given then the nz parameter is ignored
3503 
3504    A nonzero block is any block that as 1 or more nonzeros in it
3505 
3506    The block AIJ format is fully compatible with standard Fortran 77
3507    storage.  That is, the stored row and column indices can begin at
3508    either one (as in Fortran) or zero.  See the users' manual for details.
3509 
3510    Specify the preallocated storage with either nz or nnz (not both).
3511    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3512    allocation.  See Users-Manual: ch_mat for details.
3513    matrices.
3514 
3515 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3516 @*/
3517 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3518 {
3519   PetscErrorCode ierr;
3520 
3521   PetscFunctionBegin;
3522   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3523   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3524   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3525   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3526   PetscFunctionReturn(0);
3527 }
3528 
3529 /*@C
3530    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3531    per row in the matrix. For good matrix assembly performance the
3532    user should preallocate the matrix storage by setting the parameter nz
3533    (or the array nnz).  By setting these parameters accurately, performance
3534    during matrix assembly can be increased by more than a factor of 50.
3535 
3536    Collective
3537 
3538    Input Parameters:
3539 +  B - the matrix
3540 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3541           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3542 .  nz - number of block nonzeros per block row (same for all rows)
3543 -  nnz - array containing the number of block nonzeros in the various block rows
3544          (possibly different for each block row) or NULL
3545 
3546    Options Database Keys:
3547 +   -mat_no_unroll - uses code that does not unroll the loops in the
3548                      block calculations (much slower)
3549 -   -mat_block_size - size of the blocks to use
3550 
3551    Level: intermediate
3552 
3553    Notes:
3554    If the nnz parameter is given then the nz parameter is ignored
3555 
3556    You can call MatGetInfo() to get information on how effective the preallocation was;
3557    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3558    You can also run with the option -info and look for messages with the string
3559    malloc in them to see if additional memory allocation was needed.
3560 
3561    The block AIJ format is fully compatible with standard Fortran 77
3562    storage.  That is, the stored row and column indices can begin at
3563    either one (as in Fortran) or zero.  See the users' manual for details.
3564 
3565    Specify the preallocated storage with either nz or nnz (not both).
3566    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3567    allocation.  See Users-Manual: ch_mat for details.
3568 
3569 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3570 @*/
3571 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3572 {
3573   PetscErrorCode ierr;
3574 
3575   PetscFunctionBegin;
3576   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3577   PetscValidType(B,1);
3578   PetscValidLogicalCollectiveInt(B,bs,2);
3579   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3580   PetscFunctionReturn(0);
3581 }
3582 
3583 /*@C
3584    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3585 
3586    Collective
3587 
3588    Input Parameters:
3589 +  B - the matrix
3590 .  i - the indices into j for the start of each local row (starts with zero)
3591 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3592 -  v - optional values in the matrix
3593 
3594    Level: advanced
3595 
3596    Notes:
3597    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3598    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3599    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3600    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3601    block column and the second index is over columns within a block.
3602 
3603    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3604 
3605 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3606 @*/
3607 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3608 {
3609   PetscErrorCode ierr;
3610 
3611   PetscFunctionBegin;
3612   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3613   PetscValidType(B,1);
3614   PetscValidLogicalCollectiveInt(B,bs,2);
3615   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3616   PetscFunctionReturn(0);
3617 }
3618 
3619 /*@
3620      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3621 
3622      Collective
3623 
3624    Input Parameters:
3625 +  comm - must be an MPI communicator of size 1
3626 .  bs - size of block
3627 .  m - number of rows
3628 .  n - number of columns
3629 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3630 .  j - column indices
3631 -  a - matrix values
3632 
3633    Output Parameter:
3634 .  mat - the matrix
3635 
3636    Level: advanced
3637 
3638    Notes:
3639        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3640     once the matrix is destroyed
3641 
3642        You cannot set new nonzero locations into this matrix, that will generate an error.
3643 
3644        The i and j indices are 0 based
3645 
3646        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3647 
3648       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3649       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3650       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3651       with column-major ordering within blocks.
3652 
3653 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3654 
3655 @*/
3656 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3657 {
3658   PetscErrorCode ierr;
3659   PetscInt       ii;
3660   Mat_SeqBAIJ    *baij;
3661 
3662   PetscFunctionBegin;
3663   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3664   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3665 
3666   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3667   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3668   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3669   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
3670   baij = (Mat_SeqBAIJ*)(*mat)->data;
3671   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3672   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3673 
3674   baij->i = i;
3675   baij->j = j;
3676   baij->a = a;
3677 
3678   baij->singlemalloc = PETSC_FALSE;
3679   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3680   baij->free_a       = PETSC_FALSE;
3681   baij->free_ij      = PETSC_FALSE;
3682 
3683   for (ii=0; ii<m; ii++) {
3684     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3685     if (PetscUnlikelyDebug(i[ii+1] - i[ii] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3686   }
3687   if (PetscDefined(USE_DEBUG)) {
3688     for (ii=0; ii<baij->i[m]; ii++) {
3689       if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3690       if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3691     }
3692   }
3693 
3694   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3695   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3696   PetscFunctionReturn(0);
3697 }
3698 
3699 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3700 {
3701   PetscErrorCode ierr;
3702   PetscMPIInt    size;
3703 
3704   PetscFunctionBegin;
3705   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3706   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3707     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3708   } else {
3709     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3710   }
3711   PetscFunctionReturn(0);
3712 }
3713