xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 91e63d38360eb9bc922f79d792328cc4769c01ac)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 
20 PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A,PetscInt type,PetscReal *reductions)
21 {
22   PetscErrorCode ierr;
23   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) A->data;
24   PetscInt       m,n,i;
25   PetscInt       ib,jb,bs = A->rmap->bs;
26   MatScalar      *a_val = a_aij->a;
27 
28   PetscFunctionBegin;
29   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
30   for (i=0; i<n; i++) reductions[i] = 0.0;
31   if (type == NORM_2) {
32     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
33       for (jb=0; jb<bs; jb++) {
34         for (ib=0; ib<bs; ib++) {
35           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
36           a_val++;
37         }
38       }
39     }
40   } else if (type == NORM_1) {
41     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
42       for (jb=0; jb<bs; jb++) {
43         for (ib=0; ib<bs; ib++) {
44           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
45           a_val++;
46         }
47       }
48     }
49   } else if (type == NORM_INFINITY) {
50     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
51       for (jb=0; jb<bs; jb++) {
52         for (ib=0; ib<bs; ib++) {
53           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
54           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
55           a_val++;
56         }
57       }
58     }
59   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
60     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
61       for (jb=0; jb<bs; jb++) {
62         for (ib=0; ib<bs; ib++) {
63           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
64           a_val++;
65         }
66       }
67     }
68   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
69     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
70       for (jb=0; jb<bs; jb++) {
71         for (ib=0; ib<bs; ib++) {
72           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
73           a_val++;
74         }
75       }
76     }
77   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
78   if (type == NORM_2) {
79     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
80   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
81     for (i=0; i<n; i++) reductions[i] /= m;
82   }
83   PetscFunctionReturn(0);
84 }
85 
86 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
87 {
88   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
89   PetscErrorCode ierr;
90   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
91   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
92   PetscReal      shift = 0.0;
93   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
94 
95   PetscFunctionBegin;
96   allowzeropivot = PetscNot(A->erroriffailure);
97 
98   if (a->idiagvalid) {
99     if (values) *values = a->idiag;
100     PetscFunctionReturn(0);
101   }
102   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
103   diag_offset = a->diag;
104   if (!a->idiag) {
105     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
106     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
107   }
108   diag  = a->idiag;
109   if (values) *values = a->idiag;
110   /* factor and invert each block */
111   switch (bs) {
112   case 1:
113     for (i=0; i<mbs; i++) {
114       odiag    = v + 1*diag_offset[i];
115       diag[0]  = odiag[0];
116 
117       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
118         if (allowzeropivot) {
119           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
120           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
121           A->factorerror_zeropivot_row   = i;
122           ierr = PetscInfo(A,"Zero pivot, row %" PetscInt_FMT "\n",i);CHKERRQ(ierr);
123         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
124       }
125 
126       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
127       diag    += 1;
128     }
129     break;
130   case 2:
131     for (i=0; i<mbs; i++) {
132       odiag    = v + 4*diag_offset[i];
133       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
134       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
135       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
136       diag    += 4;
137     }
138     break;
139   case 3:
140     for (i=0; i<mbs; i++) {
141       odiag    = v + 9*diag_offset[i];
142       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
143       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
144       diag[8]  = odiag[8];
145       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
146       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
147       diag    += 9;
148     }
149     break;
150   case 4:
151     for (i=0; i<mbs; i++) {
152       odiag  = v + 16*diag_offset[i];
153       ierr   = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr);
154       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
155       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
156       diag  += 16;
157     }
158     break;
159   case 5:
160     for (i=0; i<mbs; i++) {
161       odiag  = v + 25*diag_offset[i];
162       ierr   = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr);
163       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
164       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
165       diag  += 25;
166     }
167     break;
168   case 6:
169     for (i=0; i<mbs; i++) {
170       odiag  = v + 36*diag_offset[i];
171       ierr   = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr);
172       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
173       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
174       diag  += 36;
175     }
176     break;
177   case 7:
178     for (i=0; i<mbs; i++) {
179       odiag  = v + 49*diag_offset[i];
180       ierr   = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr);
181       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
182       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
183       diag  += 49;
184     }
185     break;
186   default:
187     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
188     for (i=0; i<mbs; i++) {
189       odiag  = v + bs2*diag_offset[i];
190       ierr   = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr);
191       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
192       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
193       diag  += bs2;
194     }
195     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
196   }
197   a->idiagvalid = PETSC_TRUE;
198   PetscFunctionReturn(0);
199 }
200 
201 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
202 {
203   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
204   PetscScalar       *x,*work,*w,*workt,*t;
205   const MatScalar   *v,*aa = a->a, *idiag;
206   const PetscScalar *b,*xb;
207   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
208   PetscErrorCode    ierr;
209   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
210   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
211 
212   PetscFunctionBegin;
213   its = its*lits;
214   PetscCheckFalse(flag & SOR_EISENSTAT,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
215   PetscCheckFalse(its <= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
216   PetscCheckFalse(fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
217   PetscCheckFalse(omega != 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor");
218   PetscCheckFalse((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
219 
220   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
221 
222   if (!m) PetscFunctionReturn(0);
223   diag  = a->diag;
224   idiag = a->idiag;
225   k    = PetscMax(A->rmap->n,A->cmap->n);
226   if (!a->mult_work) {
227     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
228   }
229   if (!a->sor_workt) {
230     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
231   }
232   if (!a->sor_work) {
233     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
234   }
235   work = a->mult_work;
236   t    = a->sor_workt;
237   w    = a->sor_work;
238 
239   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
240   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
241 
242   if (flag & SOR_ZERO_INITIAL_GUESS) {
243     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
244       switch (bs) {
245       case 1:
246         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
247         t[0] = b[0];
248         i2     = 1;
249         idiag += 1;
250         for (i=1; i<m; i++) {
251           v  = aa + ai[i];
252           vi = aj + ai[i];
253           nz = diag[i] - ai[i];
254           s[0] = b[i2];
255           for (j=0; j<nz; j++) {
256             xw[0] = x[vi[j]];
257             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
258           }
259           t[i2] = s[0];
260           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
261           x[i2]  = xw[0];
262           idiag += 1;
263           i2    += 1;
264         }
265         break;
266       case 2:
267         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
268         t[0] = b[0]; t[1] = b[1];
269         i2     = 2;
270         idiag += 4;
271         for (i=1; i<m; i++) {
272           v  = aa + 4*ai[i];
273           vi = aj + ai[i];
274           nz = diag[i] - ai[i];
275           s[0] = b[i2]; s[1] = b[i2+1];
276           for (j=0; j<nz; j++) {
277             idx = 2*vi[j];
278             it  = 4*j;
279             xw[0] = x[idx]; xw[1] = x[1+idx];
280             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
281           }
282           t[i2] = s[0]; t[i2+1] = s[1];
283           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
284           x[i2]   = xw[0]; x[i2+1] = xw[1];
285           idiag  += 4;
286           i2     += 2;
287         }
288         break;
289       case 3:
290         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
291         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
292         i2     = 3;
293         idiag += 9;
294         for (i=1; i<m; i++) {
295           v  = aa + 9*ai[i];
296           vi = aj + ai[i];
297           nz = diag[i] - ai[i];
298           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
299           while (nz--) {
300             idx = 3*(*vi++);
301             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
302             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
303             v  += 9;
304           }
305           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
306           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
307           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
308           idiag  += 9;
309           i2     += 3;
310         }
311         break;
312       case 4:
313         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
314         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
315         i2     = 4;
316         idiag += 16;
317         for (i=1; i<m; i++) {
318           v  = aa + 16*ai[i];
319           vi = aj + ai[i];
320           nz = diag[i] - ai[i];
321           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
322           while (nz--) {
323             idx = 4*(*vi++);
324             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
325             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
326             v  += 16;
327           }
328           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
329           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
330           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
331           idiag  += 16;
332           i2     += 4;
333         }
334         break;
335       case 5:
336         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
337         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
338         i2     = 5;
339         idiag += 25;
340         for (i=1; i<m; i++) {
341           v  = aa + 25*ai[i];
342           vi = aj + ai[i];
343           nz = diag[i] - ai[i];
344           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
345           while (nz--) {
346             idx = 5*(*vi++);
347             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
348             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
349             v  += 25;
350           }
351           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
352           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
353           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
354           idiag  += 25;
355           i2     += 5;
356         }
357         break;
358       case 6:
359         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
360         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
361         i2     = 6;
362         idiag += 36;
363         for (i=1; i<m; i++) {
364           v  = aa + 36*ai[i];
365           vi = aj + ai[i];
366           nz = diag[i] - ai[i];
367           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
368           while (nz--) {
369             idx = 6*(*vi++);
370             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
371             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
372             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
373             v  += 36;
374           }
375           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
376           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
377           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
378           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
379           idiag  += 36;
380           i2     += 6;
381         }
382         break;
383       case 7:
384         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
385         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
386         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
387         i2     = 7;
388         idiag += 49;
389         for (i=1; i<m; i++) {
390           v  = aa + 49*ai[i];
391           vi = aj + ai[i];
392           nz = diag[i] - ai[i];
393           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
394           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
395           while (nz--) {
396             idx = 7*(*vi++);
397             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
398             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
399             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
400             v  += 49;
401           }
402           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
403           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
404           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
405           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
406           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
407           idiag  += 49;
408           i2     += 7;
409         }
410         break;
411       default:
412         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
413         ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr);
414         i2     = bs;
415         idiag += bs2;
416         for (i=1; i<m; i++) {
417           v  = aa + bs2*ai[i];
418           vi = aj + ai[i];
419           nz = diag[i] - ai[i];
420 
421           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
422           /* copy all rows of x that are needed into contiguous space */
423           workt = work;
424           for (j=0; j<nz; j++) {
425             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
426             workt += bs;
427           }
428           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
429           ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr);
430           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
431 
432           idiag += bs2;
433           i2    += bs;
434         }
435         break;
436       }
437       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
438       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
439       xb = t;
440     }
441     else xb = b;
442     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
443       idiag = a->idiag+bs2*(a->mbs-1);
444       i2 = bs * (m-1);
445       switch (bs) {
446       case 1:
447         s[0]  = xb[i2];
448         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
449         x[i2] = xw[0];
450         i2   -= 1;
451         for (i=m-2; i>=0; i--) {
452           v  = aa + (diag[i]+1);
453           vi = aj + diag[i] + 1;
454           nz = ai[i+1] - diag[i] - 1;
455           s[0] = xb[i2];
456           for (j=0; j<nz; j++) {
457             xw[0] = x[vi[j]];
458             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
459           }
460           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
461           x[i2]  = xw[0];
462           idiag -= 1;
463           i2    -= 1;
464         }
465         break;
466       case 2:
467         s[0]  = xb[i2]; s[1] = xb[i2+1];
468         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
469         x[i2] = xw[0]; x[i2+1] = xw[1];
470         i2    -= 2;
471         idiag -= 4;
472         for (i=m-2; i>=0; i--) {
473           v  = aa + 4*(diag[i] + 1);
474           vi = aj + diag[i] + 1;
475           nz = ai[i+1] - diag[i] - 1;
476           s[0] = xb[i2]; s[1] = xb[i2+1];
477           for (j=0; j<nz; j++) {
478             idx = 2*vi[j];
479             it  = 4*j;
480             xw[0] = x[idx]; xw[1] = x[1+idx];
481             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
482           }
483           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
484           x[i2]   = xw[0]; x[i2+1] = xw[1];
485           idiag  -= 4;
486           i2     -= 2;
487         }
488         break;
489       case 3:
490         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
491         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
492         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
493         i2    -= 3;
494         idiag -= 9;
495         for (i=m-2; i>=0; i--) {
496           v  = aa + 9*(diag[i]+1);
497           vi = aj + diag[i] + 1;
498           nz = ai[i+1] - diag[i] - 1;
499           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
500           while (nz--) {
501             idx = 3*(*vi++);
502             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
503             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
504             v  += 9;
505           }
506           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
507           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
508           idiag  -= 9;
509           i2     -= 3;
510         }
511         break;
512       case 4:
513         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
514         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
515         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
516         i2    -= 4;
517         idiag -= 16;
518         for (i=m-2; i>=0; i--) {
519           v  = aa + 16*(diag[i]+1);
520           vi = aj + diag[i] + 1;
521           nz = ai[i+1] - diag[i] - 1;
522           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
523           while (nz--) {
524             idx = 4*(*vi++);
525             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
526             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
527             v  += 16;
528           }
529           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
530           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
531           idiag  -= 16;
532           i2     -= 4;
533         }
534         break;
535       case 5:
536         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
537         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
538         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
539         i2    -= 5;
540         idiag -= 25;
541         for (i=m-2; i>=0; i--) {
542           v  = aa + 25*(diag[i]+1);
543           vi = aj + diag[i] + 1;
544           nz = ai[i+1] - diag[i] - 1;
545           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
546           while (nz--) {
547             idx = 5*(*vi++);
548             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
549             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
550             v  += 25;
551           }
552           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
553           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
554           idiag  -= 25;
555           i2     -= 5;
556         }
557         break;
558       case 6:
559         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
560         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
561         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
562         i2    -= 6;
563         idiag -= 36;
564         for (i=m-2; i>=0; i--) {
565           v  = aa + 36*(diag[i]+1);
566           vi = aj + diag[i] + 1;
567           nz = ai[i+1] - diag[i] - 1;
568           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
569           while (nz--) {
570             idx = 6*(*vi++);
571             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
572             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
573             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
574             v  += 36;
575           }
576           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
577           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
578           idiag  -= 36;
579           i2     -= 6;
580         }
581         break;
582       case 7:
583         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
584         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
585         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
586         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
587         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
588         i2    -= 7;
589         idiag -= 49;
590         for (i=m-2; i>=0; i--) {
591           v  = aa + 49*(diag[i]+1);
592           vi = aj + diag[i] + 1;
593           nz = ai[i+1] - diag[i] - 1;
594           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
595           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
596           while (nz--) {
597             idx = 7*(*vi++);
598             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
599             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
600             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
601             v  += 49;
602           }
603           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
604           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
605           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
606           idiag  -= 49;
607           i2     -= 7;
608         }
609         break;
610       default:
611         ierr  = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
612         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
613         i2    -= bs;
614         idiag -= bs2;
615         for (i=m-2; i>=0; i--) {
616           v  = aa + bs2*(diag[i]+1);
617           vi = aj + diag[i] + 1;
618           nz = ai[i+1] - diag[i] - 1;
619 
620           ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
621           /* copy all rows of x that are needed into contiguous space */
622           workt = work;
623           for (j=0; j<nz; j++) {
624             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
625             workt += bs;
626           }
627           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
628           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
629 
630           idiag -= bs2;
631           i2    -= bs;
632         }
633         break;
634       }
635       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
636     }
637     its--;
638   }
639   while (its--) {
640     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
641       idiag = a->idiag;
642       i2 = 0;
643       switch (bs) {
644       case 1:
645         for (i=0; i<m; i++) {
646           v  = aa + ai[i];
647           vi = aj + ai[i];
648           nz = ai[i+1] - ai[i];
649           s[0] = b[i2];
650           for (j=0; j<nz; j++) {
651             xw[0] = x[vi[j]];
652             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
653           }
654           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
655           x[i2] += xw[0];
656           idiag += 1;
657           i2    += 1;
658         }
659         break;
660       case 2:
661         for (i=0; i<m; i++) {
662           v  = aa + 4*ai[i];
663           vi = aj + ai[i];
664           nz = ai[i+1] - ai[i];
665           s[0] = b[i2]; s[1] = b[i2+1];
666           for (j=0; j<nz; j++) {
667             idx = 2*vi[j];
668             it  = 4*j;
669             xw[0] = x[idx]; xw[1] = x[1+idx];
670             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
671           }
672           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
673           x[i2]  += xw[0]; x[i2+1] += xw[1];
674           idiag  += 4;
675           i2     += 2;
676         }
677         break;
678       case 3:
679         for (i=0; i<m; i++) {
680           v  = aa + 9*ai[i];
681           vi = aj + ai[i];
682           nz = ai[i+1] - ai[i];
683           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
684           while (nz--) {
685             idx = 3*(*vi++);
686             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
687             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
688             v  += 9;
689           }
690           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
691           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
692           idiag  += 9;
693           i2     += 3;
694         }
695         break;
696       case 4:
697         for (i=0; i<m; i++) {
698           v  = aa + 16*ai[i];
699           vi = aj + ai[i];
700           nz = ai[i+1] - ai[i];
701           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
702           while (nz--) {
703             idx = 4*(*vi++);
704             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
705             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
706             v  += 16;
707           }
708           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
709           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
710           idiag  += 16;
711           i2     += 4;
712         }
713         break;
714       case 5:
715         for (i=0; i<m; i++) {
716           v  = aa + 25*ai[i];
717           vi = aj + ai[i];
718           nz = ai[i+1] - ai[i];
719           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
720           while (nz--) {
721             idx = 5*(*vi++);
722             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
723             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
724             v  += 25;
725           }
726           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
727           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
728           idiag  += 25;
729           i2     += 5;
730         }
731         break;
732       case 6:
733         for (i=0; i<m; i++) {
734           v  = aa + 36*ai[i];
735           vi = aj + ai[i];
736           nz = ai[i+1] - ai[i];
737           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
738           while (nz--) {
739             idx = 6*(*vi++);
740             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
741             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
742             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
743             v  += 36;
744           }
745           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
746           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
747           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
748           idiag  += 36;
749           i2     += 6;
750         }
751         break;
752       case 7:
753         for (i=0; i<m; i++) {
754           v  = aa + 49*ai[i];
755           vi = aj + ai[i];
756           nz = ai[i+1] - ai[i];
757           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
758           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
759           while (nz--) {
760             idx = 7*(*vi++);
761             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
762             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
763             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
764             v  += 49;
765           }
766           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
767           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
768           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
769           idiag  += 49;
770           i2     += 7;
771         }
772         break;
773       default:
774         for (i=0; i<m; i++) {
775           v  = aa + bs2*ai[i];
776           vi = aj + ai[i];
777           nz = ai[i+1] - ai[i];
778 
779           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
780           /* copy all rows of x that are needed into contiguous space */
781           workt = work;
782           for (j=0; j<nz; j++) {
783             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
784             workt += bs;
785           }
786           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
787           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
788 
789           idiag += bs2;
790           i2    += bs;
791         }
792         break;
793       }
794       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
795     }
796     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
797       idiag = a->idiag+bs2*(a->mbs-1);
798       i2 = bs * (m-1);
799       switch (bs) {
800       case 1:
801         for (i=m-1; i>=0; i--) {
802           v  = aa + ai[i];
803           vi = aj + ai[i];
804           nz = ai[i+1] - ai[i];
805           s[0] = b[i2];
806           for (j=0; j<nz; j++) {
807             xw[0] = x[vi[j]];
808             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
809           }
810           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
811           x[i2] += xw[0];
812           idiag -= 1;
813           i2    -= 1;
814         }
815         break;
816       case 2:
817         for (i=m-1; i>=0; i--) {
818           v  = aa + 4*ai[i];
819           vi = aj + ai[i];
820           nz = ai[i+1] - ai[i];
821           s[0] = b[i2]; s[1] = b[i2+1];
822           for (j=0; j<nz; j++) {
823             idx = 2*vi[j];
824             it  = 4*j;
825             xw[0] = x[idx]; xw[1] = x[1+idx];
826             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
827           }
828           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
829           x[i2]  += xw[0]; x[i2+1] += xw[1];
830           idiag  -= 4;
831           i2     -= 2;
832         }
833         break;
834       case 3:
835         for (i=m-1; i>=0; i--) {
836           v  = aa + 9*ai[i];
837           vi = aj + ai[i];
838           nz = ai[i+1] - ai[i];
839           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
840           while (nz--) {
841             idx = 3*(*vi++);
842             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
843             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
844             v  += 9;
845           }
846           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
847           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
848           idiag  -= 9;
849           i2     -= 3;
850         }
851         break;
852       case 4:
853         for (i=m-1; i>=0; i--) {
854           v  = aa + 16*ai[i];
855           vi = aj + ai[i];
856           nz = ai[i+1] - ai[i];
857           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
858           while (nz--) {
859             idx = 4*(*vi++);
860             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
861             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
862             v  += 16;
863           }
864           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
865           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
866           idiag  -= 16;
867           i2     -= 4;
868         }
869         break;
870       case 5:
871         for (i=m-1; i>=0; i--) {
872           v  = aa + 25*ai[i];
873           vi = aj + ai[i];
874           nz = ai[i+1] - ai[i];
875           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
876           while (nz--) {
877             idx = 5*(*vi++);
878             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
879             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
880             v  += 25;
881           }
882           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
883           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
884           idiag  -= 25;
885           i2     -= 5;
886         }
887         break;
888       case 6:
889         for (i=m-1; i>=0; i--) {
890           v  = aa + 36*ai[i];
891           vi = aj + ai[i];
892           nz = ai[i+1] - ai[i];
893           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
894           while (nz--) {
895             idx = 6*(*vi++);
896             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
897             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
898             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
899             v  += 36;
900           }
901           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
902           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
903           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
904           idiag  -= 36;
905           i2     -= 6;
906         }
907         break;
908       case 7:
909         for (i=m-1; i>=0; i--) {
910           v  = aa + 49*ai[i];
911           vi = aj + ai[i];
912           nz = ai[i+1] - ai[i];
913           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
914           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
915           while (nz--) {
916             idx = 7*(*vi++);
917             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
918             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
919             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
920             v  += 49;
921           }
922           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
923           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
924           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
925           idiag  -= 49;
926           i2     -= 7;
927         }
928         break;
929       default:
930         for (i=m-1; i>=0; i--) {
931           v  = aa + bs2*ai[i];
932           vi = aj + ai[i];
933           nz = ai[i+1] - ai[i];
934 
935           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
936           /* copy all rows of x that are needed into contiguous space */
937           workt = work;
938           for (j=0; j<nz; j++) {
939             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
940             workt += bs;
941           }
942           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
943           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
944 
945           idiag -= bs2;
946           i2    -= bs;
947         }
948         break;
949       }
950       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
951     }
952   }
953   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
954   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
955   PetscFunctionReturn(0);
956 }
957 
958 /*
959     Special version for direct calls from Fortran (Used in PETSc-fun3d)
960 */
961 #if defined(PETSC_HAVE_FORTRAN_CAPS)
962 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
963 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
964 #define matsetvaluesblocked4_ matsetvaluesblocked4
965 #endif
966 
967 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
968 {
969   Mat               A  = *AA;
970   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
971   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
972   PetscInt          *ai    =a->i,*ailen=a->ilen;
973   PetscInt          *aj    =a->j,stepval,lastcol = -1;
974   const PetscScalar *value = v;
975   MatScalar         *ap,*aa = a->a,*bap;
976   PetscErrorCode    ierr;
977 
978   PetscFunctionBegin;
979   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
980   stepval = (n-1)*4;
981   for (k=0; k<m; k++) { /* loop over added rows */
982     row  = im[k];
983     rp   = aj + ai[row];
984     ap   = aa + 16*ai[row];
985     nrow = ailen[row];
986     low  = 0;
987     high = nrow;
988     for (l=0; l<n; l++) { /* loop over added columns */
989       col = in[l];
990       if (col <= lastcol)  low = 0;
991       else                high = nrow;
992       lastcol = col;
993       value   = v + k*(stepval+4 + l)*4;
994       while (high-low > 7) {
995         t = (low+high)/2;
996         if (rp[t] > col) high = t;
997         else             low  = t;
998       }
999       for (i=low; i<high; i++) {
1000         if (rp[i] > col) break;
1001         if (rp[i] == col) {
1002           bap = ap +  16*i;
1003           for (ii=0; ii<4; ii++,value+=stepval) {
1004             for (jj=ii; jj<16; jj+=4) {
1005               bap[jj] += *value++;
1006             }
1007           }
1008           goto noinsert2;
1009         }
1010       }
1011       N = nrow++ - 1;
1012       high++; /* added new column index thus must search to one higher than before */
1013       /* shift up all the later entries in this row */
1014       for (ii=N; ii>=i; ii--) {
1015         rp[ii+1] = rp[ii];
1016         ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
1017       }
1018       if (N >= i) {
1019         ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1020       }
1021       rp[i] = col;
1022       bap   = ap +  16*i;
1023       for (ii=0; ii<4; ii++,value+=stepval) {
1024         for (jj=ii; jj<16; jj+=4) {
1025           bap[jj] = *value++;
1026         }
1027       }
1028       noinsert2:;
1029       low = i;
1030     }
1031     ailen[row] = nrow;
1032   }
1033   PetscFunctionReturnVoid();
1034 }
1035 
1036 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1037 #define matsetvalues4_ MATSETVALUES4
1038 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1039 #define matsetvalues4_ matsetvalues4
1040 #endif
1041 
1042 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1043 {
1044   Mat         A  = *AA;
1045   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1046   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
1047   PetscInt    *ai=a->i,*ailen=a->ilen;
1048   PetscInt    *aj=a->j,brow,bcol;
1049   PetscInt    ridx,cidx,lastcol = -1;
1050   MatScalar   *ap,value,*aa=a->a,*bap;
1051   PetscErrorCode ierr;
1052 
1053   PetscFunctionBegin;
1054   for (k=0; k<m; k++) { /* loop over added rows */
1055     row  = im[k]; brow = row/4;
1056     rp   = aj + ai[brow];
1057     ap   = aa + 16*ai[brow];
1058     nrow = ailen[brow];
1059     low  = 0;
1060     high = nrow;
1061     for (l=0; l<n; l++) { /* loop over added columns */
1062       col   = in[l]; bcol = col/4;
1063       ridx  = row % 4; cidx = col % 4;
1064       value = v[l + k*n];
1065       if (col <= lastcol)  low = 0;
1066       else                high = nrow;
1067       lastcol = col;
1068       while (high-low > 7) {
1069         t = (low+high)/2;
1070         if (rp[t] > bcol) high = t;
1071         else              low  = t;
1072       }
1073       for (i=low; i<high; i++) {
1074         if (rp[i] > bcol) break;
1075         if (rp[i] == bcol) {
1076           bap   = ap +  16*i + 4*cidx + ridx;
1077           *bap += value;
1078           goto noinsert1;
1079         }
1080       }
1081       N = nrow++ - 1;
1082       high++; /* added new column thus must search to one higher than before */
1083       /* shift up all the later entries in this row */
1084       ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
1085       ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
1086       ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1087       rp[i]                    = bcol;
1088       ap[16*i + 4*cidx + ridx] = value;
1089 noinsert1:;
1090       low = i;
1091     }
1092     ailen[brow] = nrow;
1093   }
1094   PetscFunctionReturnVoid();
1095 }
1096 
1097 /*
1098      Checks for missing diagonals
1099 */
1100 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1101 {
1102   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1103   PetscErrorCode ierr;
1104   PetscInt       *diag,*ii = a->i,i;
1105 
1106   PetscFunctionBegin;
1107   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1108   *missing = PETSC_FALSE;
1109   if (A->rmap->n > 0 && !ii) {
1110     *missing = PETSC_TRUE;
1111     if (d) *d = 0;
1112     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
1113   } else {
1114     PetscInt n;
1115     n = PetscMin(a->mbs, a->nbs);
1116     diag = a->diag;
1117     for (i=0; i<n; i++) {
1118       if (diag[i] >= ii[i+1]) {
1119         *missing = PETSC_TRUE;
1120         if (d) *d = i;
1121         ierr = PetscInfo(A,"Matrix is missing block diagonal number %" PetscInt_FMT "\n",i);CHKERRQ(ierr);
1122         break;
1123       }
1124     }
1125   }
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1130 {
1131   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1132   PetscErrorCode ierr;
1133   PetscInt       i,j,m = a->mbs;
1134 
1135   PetscFunctionBegin;
1136   if (!a->diag) {
1137     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1138     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1139     a->free_diag = PETSC_TRUE;
1140   }
1141   for (i=0; i<m; i++) {
1142     a->diag[i] = a->i[i+1];
1143     for (j=a->i[i]; j<a->i[i+1]; j++) {
1144       if (a->j[j] == i) {
1145         a->diag[i] = j;
1146         break;
1147       }
1148     }
1149   }
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1154 {
1155   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1156   PetscErrorCode ierr;
1157   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1158   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1159 
1160   PetscFunctionBegin;
1161   *nn = n;
1162   if (!ia) PetscFunctionReturn(0);
1163   if (symmetric) {
1164     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1165     nz   = tia[n];
1166   } else {
1167     tia = a->i; tja = a->j;
1168   }
1169 
1170   if (!blockcompressed && bs > 1) {
1171     (*nn) *= bs;
1172     /* malloc & create the natural set of indices */
1173     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1174     if (n) {
1175       (*ia)[0] = oshift;
1176       for (j=1; j<bs; j++) {
1177         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1178       }
1179     }
1180 
1181     for (i=1; i<n; i++) {
1182       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1183       for (j=1; j<bs; j++) {
1184         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1185       }
1186     }
1187     if (n) {
1188       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1189     }
1190 
1191     if (inja) {
1192       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1193       cnt = 0;
1194       for (i=0; i<n; i++) {
1195         for (j=0; j<bs; j++) {
1196           for (k=tia[i]; k<tia[i+1]; k++) {
1197             for (l=0; l<bs; l++) {
1198               (*ja)[cnt++] = bs*tja[k] + l;
1199             }
1200           }
1201         }
1202       }
1203     }
1204 
1205     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1206       ierr = PetscFree(tia);CHKERRQ(ierr);
1207       ierr = PetscFree(tja);CHKERRQ(ierr);
1208     }
1209   } else if (oshift == 1) {
1210     if (symmetric) {
1211       nz = tia[A->rmap->n/bs];
1212       /*  add 1 to i and j indices */
1213       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1214       *ia = tia;
1215       if (ja) {
1216         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1217         *ja = tja;
1218       }
1219     } else {
1220       nz = a->i[A->rmap->n/bs];
1221       /* malloc space and  add 1 to i and j indices */
1222       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1223       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1224       if (ja) {
1225         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1226         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1227       }
1228     }
1229   } else {
1230     *ia = tia;
1231     if (ja) *ja = tja;
1232   }
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1237 {
1238   PetscErrorCode ierr;
1239 
1240   PetscFunctionBegin;
1241   if (!ia) PetscFunctionReturn(0);
1242   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1243     ierr = PetscFree(*ia);CHKERRQ(ierr);
1244     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1245   }
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1250 {
1251   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1252   PetscErrorCode ierr;
1253 
1254   PetscFunctionBegin;
1255 #if defined(PETSC_USE_LOG)
1256   PetscLogObjectState((PetscObject)A,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT,A->rmap->N,A->cmap->n,a->nz);
1257 #endif
1258   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1259   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1260   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1261   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1262   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1263   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1264   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1265   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1266   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1267   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1268   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1269   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1270   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1271 
1272   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1273   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1274   ierr = PetscFree(A->data);CHKERRQ(ierr);
1275 
1276   ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1288 #if defined(PETSC_HAVE_HYPRE)
1289   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
1290 #endif
1291   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1296 {
1297   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1298   PetscErrorCode ierr;
1299 
1300   PetscFunctionBegin;
1301   switch (op) {
1302   case MAT_ROW_ORIENTED:
1303     a->roworiented = flg;
1304     break;
1305   case MAT_KEEP_NONZERO_PATTERN:
1306     a->keepnonzeropattern = flg;
1307     break;
1308   case MAT_NEW_NONZERO_LOCATIONS:
1309     a->nonew = (flg ? 0 : 1);
1310     break;
1311   case MAT_NEW_NONZERO_LOCATION_ERR:
1312     a->nonew = (flg ? -1 : 0);
1313     break;
1314   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1315     a->nonew = (flg ? -2 : 0);
1316     break;
1317   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1318     a->nounused = (flg ? -1 : 0);
1319     break;
1320   case MAT_FORCE_DIAGONAL_ENTRIES:
1321   case MAT_IGNORE_OFF_PROC_ENTRIES:
1322   case MAT_USE_HASH_TABLE:
1323   case MAT_SORTED_FULL:
1324     ierr = PetscInfo(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1325     break;
1326   case MAT_SPD:
1327   case MAT_SYMMETRIC:
1328   case MAT_STRUCTURALLY_SYMMETRIC:
1329   case MAT_HERMITIAN:
1330   case MAT_SYMMETRY_ETERNAL:
1331   case MAT_SUBMAT_SINGLEIS:
1332   case MAT_STRUCTURE_ONLY:
1333     /* These options are handled directly by MatSetOption() */
1334     break;
1335   default:
1336     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1337   }
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1342 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1343 {
1344   PetscErrorCode ierr;
1345   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1346   MatScalar      *aa_i;
1347   PetscScalar    *v_i;
1348 
1349   PetscFunctionBegin;
1350   bs  = A->rmap->bs;
1351   bs2 = bs*bs;
1352   PetscCheckFalse(row < 0 || row >= A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range", row);
1353 
1354   bn  = row/bs;   /* Block number */
1355   bp  = row % bs; /* Block Position */
1356   M   = ai[bn+1] - ai[bn];
1357   *nz = bs*M;
1358 
1359   if (v) {
1360     *v = NULL;
1361     if (*nz) {
1362       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1363       for (i=0; i<M; i++) { /* for each block in the block row */
1364         v_i  = *v + i*bs;
1365         aa_i = aa + bs2*(ai[bn] + i);
1366         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1367       }
1368     }
1369   }
1370 
1371   if (idx) {
1372     *idx = NULL;
1373     if (*nz) {
1374       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1375       for (i=0; i<M; i++) { /* for each block in the block row */
1376         idx_i = *idx + i*bs;
1377         itmp  = bs*aj[ai[bn] + i];
1378         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1379       }
1380     }
1381   }
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1386 {
1387   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1388   PetscErrorCode ierr;
1389 
1390   PetscFunctionBegin;
1391   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1396 {
1397   PetscErrorCode ierr;
1398 
1399   PetscFunctionBegin;
1400   if (nz)  *nz = 0;
1401   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1402   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1407 {
1408   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1409   Mat            C;
1410   PetscErrorCode ierr;
1411   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1412   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1413   MatScalar      *ata,*aa=a->a;
1414 
1415   PetscFunctionBegin;
1416   ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr);
1417   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1418     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1419 
1420     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1421     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1422     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1423     ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr);
1424 
1425     at  = (Mat_SeqBAIJ*)C->data;
1426     ati = at->i;
1427     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1428   } else {
1429     C = *B;
1430     at = (Mat_SeqBAIJ*)C->data;
1431     ati = at->i;
1432   }
1433 
1434   atj = at->j;
1435   ata = at->a;
1436 
1437   /* Copy ati into atfill so we have locations of the next free space in atj */
1438   ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr);
1439 
1440   /* Walk through A row-wise and mark nonzero entries of A^T. */
1441   for (i=0; i<mbs; i++) {
1442     anzj = ai[i+1] - ai[i];
1443     for (j=0; j<anzj; j++) {
1444       atj[atfill[*aj]] = i;
1445       for (kr=0; kr<bs; kr++) {
1446         for (k=0; k<bs; k++) {
1447           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1448         }
1449       }
1450       atfill[*aj++] += 1;
1451     }
1452   }
1453   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1454   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1455 
1456   /* Clean up temporary space and complete requests. */
1457   ierr = PetscFree(atfill);CHKERRQ(ierr);
1458 
1459   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1460     ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1461     *B = C;
1462   } else {
1463     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1464   }
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1469 {
1470   PetscErrorCode ierr;
1471   Mat            Btrans;
1472 
1473   PetscFunctionBegin;
1474   *f   = PETSC_FALSE;
1475   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1476   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1477   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1482 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
1483 {
1484   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)mat->data;
1485   PetscInt       header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1486   PetscInt       *rowlens,*colidxs;
1487   PetscScalar    *matvals;
1488   PetscErrorCode ierr;
1489 
1490   PetscFunctionBegin;
1491   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1492 
1493   M  = mat->rmap->N;
1494   N  = mat->cmap->N;
1495   m  = mat->rmap->n;
1496   bs = mat->rmap->bs;
1497   nz = bs*bs*A->nz;
1498 
1499   /* write matrix header */
1500   header[0] = MAT_FILE_CLASSID;
1501   header[1] = M; header[2] = N; header[3] = nz;
1502   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1503 
1504   /* store row lengths */
1505   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1506   for (cnt=0, i=0; i<A->mbs; i++)
1507     for (j=0; j<bs; j++)
1508       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1509   ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr);
1510   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1511 
1512   /* store column indices  */
1513   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1514   for (cnt=0, i=0; i<A->mbs; i++)
1515     for (k=0; k<bs; k++)
1516       for (j=A->i[i]; j<A->i[i+1]; j++)
1517         for (l=0; l<bs; l++)
1518           colidxs[cnt++] = bs*A->j[j] + l;
1519   PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1520   ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr);
1521   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1522 
1523   /* store nonzero values */
1524   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1525   for (cnt=0, i=0; i<A->mbs; i++)
1526     for (k=0; k<bs; k++)
1527       for (j=A->i[i]; j<A->i[i+1]; j++)
1528         for (l=0; l<bs; l++)
1529           matvals[cnt++] = A->a[bs*(bs*j + l) + k];
1530   PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1531   ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1532   ierr = PetscFree(matvals);CHKERRQ(ierr);
1533 
1534   /* write block size option to the viewer's .info file */
1535   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1540 {
1541   PetscErrorCode ierr;
1542   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1543   PetscInt       i,bs = A->rmap->bs,k;
1544 
1545   PetscFunctionBegin;
1546   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1547   for (i=0; i<a->mbs; i++) {
1548     ierr = PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT "-%" PetscInt_FMT ":",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1549     for (k=a->i[i]; k<a->i[i+1]; k++) {
1550       ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT "-%" PetscInt_FMT ") ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1551     }
1552     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1553   }
1554   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1559 {
1560   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1561   PetscErrorCode    ierr;
1562   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1563   PetscViewerFormat format;
1564 
1565   PetscFunctionBegin;
1566   if (A->structure_only) {
1567     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1568     PetscFunctionReturn(0);
1569   }
1570 
1571   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1572   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1573     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %" PetscInt_FMT "\n",bs);CHKERRQ(ierr);
1574   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1575     const char *matname;
1576     Mat        aij;
1577     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1578     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1579     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1580     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1581     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1582   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1583       PetscFunctionReturn(0);
1584   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1585     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1586     for (i=0; i<a->mbs; i++) {
1587       for (j=0; j<bs; j++) {
1588         ierr = PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j);CHKERRQ(ierr);
1589         for (k=a->i[i]; k<a->i[i+1]; k++) {
1590           for (l=0; l<bs; l++) {
1591 #if defined(PETSC_USE_COMPLEX)
1592             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1593               ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %gi) ",bs*a->j[k]+l,
1594                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1595             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1596               ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %gi) ",bs*a->j[k]+l,
1597                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1598             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1599               ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1600             }
1601 #else
1602             if (a->a[bs2*k + l*bs + j] != 0.0) {
1603               ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1604             }
1605 #endif
1606           }
1607         }
1608         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1609       }
1610     }
1611     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1612   } else {
1613     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1614     for (i=0; i<a->mbs; i++) {
1615       for (j=0; j<bs; j++) {
1616         ierr = PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j);CHKERRQ(ierr);
1617         for (k=a->i[i]; k<a->i[i+1]; k++) {
1618           for (l=0; l<bs; l++) {
1619 #if defined(PETSC_USE_COMPLEX)
1620             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1621               ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i) ",bs*a->j[k]+l,
1622                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1623             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1624               ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i) ",bs*a->j[k]+l,
1625                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1626             } else {
1627               ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1628             }
1629 #else
1630             ierr = PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1631 #endif
1632           }
1633         }
1634         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1635       }
1636     }
1637     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1638   }
1639   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1640   PetscFunctionReturn(0);
1641 }
1642 
1643 #include <petscdraw.h>
1644 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1645 {
1646   Mat               A = (Mat) Aa;
1647   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1648   PetscErrorCode    ierr;
1649   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1650   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1651   MatScalar         *aa;
1652   PetscViewer       viewer;
1653   PetscViewerFormat format;
1654 
1655   PetscFunctionBegin;
1656   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1657   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1658   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1659 
1660   /* loop over matrix elements drawing boxes */
1661 
1662   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1663     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1664     /* Blue for negative, Cyan for zero and  Red for positive */
1665     color = PETSC_DRAW_BLUE;
1666     for (i=0,row=0; i<mbs; i++,row+=bs) {
1667       for (j=a->i[i]; j<a->i[i+1]; j++) {
1668         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1669         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1670         aa  = a->a + j*bs2;
1671         for (k=0; k<bs; k++) {
1672           for (l=0; l<bs; l++) {
1673             if (PetscRealPart(*aa++) >=  0.) continue;
1674             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1675           }
1676         }
1677       }
1678     }
1679     color = PETSC_DRAW_CYAN;
1680     for (i=0,row=0; i<mbs; i++,row+=bs) {
1681       for (j=a->i[i]; j<a->i[i+1]; j++) {
1682         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1683         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1684         aa  = a->a + j*bs2;
1685         for (k=0; k<bs; k++) {
1686           for (l=0; l<bs; l++) {
1687             if (PetscRealPart(*aa++) != 0.) continue;
1688             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1689           }
1690         }
1691       }
1692     }
1693     color = PETSC_DRAW_RED;
1694     for (i=0,row=0; i<mbs; i++,row+=bs) {
1695       for (j=a->i[i]; j<a->i[i+1]; j++) {
1696         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1697         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1698         aa  = a->a + j*bs2;
1699         for (k=0; k<bs; k++) {
1700           for (l=0; l<bs; l++) {
1701             if (PetscRealPart(*aa++) <= 0.) continue;
1702             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1703           }
1704         }
1705       }
1706     }
1707     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1708   } else {
1709     /* use contour shading to indicate magnitude of values */
1710     /* first determine max of all nonzero values */
1711     PetscReal minv = 0.0, maxv = 0.0;
1712     PetscDraw popup;
1713 
1714     for (i=0; i<a->nz*a->bs2; i++) {
1715       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1716     }
1717     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1718     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1719     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1720 
1721     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1722     for (i=0,row=0; i<mbs; i++,row+=bs) {
1723       for (j=a->i[i]; j<a->i[i+1]; j++) {
1724         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1725         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1726         aa  = a->a + j*bs2;
1727         for (k=0; k<bs; k++) {
1728           for (l=0; l<bs; l++) {
1729             MatScalar v = *aa++;
1730             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1731             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1732           }
1733         }
1734       }
1735     }
1736     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1737   }
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1742 {
1743   PetscErrorCode ierr;
1744   PetscReal      xl,yl,xr,yr,w,h;
1745   PetscDraw      draw;
1746   PetscBool      isnull;
1747 
1748   PetscFunctionBegin;
1749   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1750   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1751   if (isnull) PetscFunctionReturn(0);
1752 
1753   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1754   xr  += w;          yr += h;        xl = -w;     yl = -h;
1755   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1756   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1757   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1758   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1759   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1764 {
1765   PetscErrorCode ierr;
1766   PetscBool      iascii,isbinary,isdraw;
1767 
1768   PetscFunctionBegin;
1769   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1770   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1771   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1772   if (iascii) {
1773     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1774   } else if (isbinary) {
1775     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1776   } else if (isdraw) {
1777     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1778   } else {
1779     Mat B;
1780     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1781     ierr = MatView(B,viewer);CHKERRQ(ierr);
1782     ierr = MatDestroy(&B);CHKERRQ(ierr);
1783   }
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1788 {
1789   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1790   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1791   PetscInt    *ai = a->i,*ailen = a->ilen;
1792   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1793   MatScalar   *ap,*aa = a->a;
1794 
1795   PetscFunctionBegin;
1796   for (k=0; k<m; k++) { /* loop over rows */
1797     row = im[k]; brow = row/bs;
1798     if (row < 0) {v += n; continue;} /* negative row */
1799     PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " too large", row);
1800     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1801     nrow = ailen[brow];
1802     for (l=0; l<n; l++) { /* loop over columns */
1803       if (in[l] < 0) {v++; continue;} /* negative column */
1804       PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %" PetscInt_FMT " too large", in[l]);
1805       col  = in[l];
1806       bcol = col/bs;
1807       cidx = col%bs;
1808       ridx = row%bs;
1809       high = nrow;
1810       low  = 0; /* assume unsorted */
1811       while (high-low > 5) {
1812         t = (low+high)/2;
1813         if (rp[t] > bcol) high = t;
1814         else             low  = t;
1815       }
1816       for (i=low; i<high; i++) {
1817         if (rp[i] > bcol) break;
1818         if (rp[i] == bcol) {
1819           *v++ = ap[bs2*i+bs*cidx+ridx];
1820           goto finished;
1821         }
1822       }
1823       *v++ = 0.0;
1824 finished:;
1825     }
1826   }
1827   PetscFunctionReturn(0);
1828 }
1829 
1830 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1831 {
1832   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1833   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1834   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1835   PetscErrorCode    ierr;
1836   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1837   PetscBool         roworiented=a->roworiented;
1838   const PetscScalar *value     = v;
1839   MatScalar         *ap=NULL,*aa = a->a,*bap;
1840 
1841   PetscFunctionBegin;
1842   if (roworiented) {
1843     stepval = (n-1)*bs;
1844   } else {
1845     stepval = (m-1)*bs;
1846   }
1847   for (k=0; k<m; k++) { /* loop over added rows */
1848     row = im[k];
1849     if (row < 0) continue;
1850     PetscCheck(row < a->mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT,row,a->mbs-1);
1851     rp   = aj + ai[row];
1852     if (!A->structure_only) ap = aa + bs2*ai[row];
1853     rmax = imax[row];
1854     nrow = ailen[row];
1855     low  = 0;
1856     high = nrow;
1857     for (l=0; l<n; l++) { /* loop over added columns */
1858       if (in[l] < 0) continue;
1859       PetscCheck(in[l] < a->nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT,in[l],a->nbs-1);
1860       col = in[l];
1861       if (!A->structure_only) {
1862         if (roworiented) {
1863           value = v + (k*(stepval+bs) + l)*bs;
1864         } else {
1865           value = v + (l*(stepval+bs) + k)*bs;
1866         }
1867       }
1868       if (col <= lastcol) low = 0;
1869       else high = nrow;
1870       lastcol = col;
1871       while (high-low > 7) {
1872         t = (low+high)/2;
1873         if (rp[t] > col) high = t;
1874         else             low  = t;
1875       }
1876       for (i=low; i<high; i++) {
1877         if (rp[i] > col) break;
1878         if (rp[i] == col) {
1879           if (A->structure_only) goto noinsert2;
1880           bap = ap +  bs2*i;
1881           if (roworiented) {
1882             if (is == ADD_VALUES) {
1883               for (ii=0; ii<bs; ii++,value+=stepval) {
1884                 for (jj=ii; jj<bs2; jj+=bs) {
1885                   bap[jj] += *value++;
1886                 }
1887               }
1888             } else {
1889               for (ii=0; ii<bs; ii++,value+=stepval) {
1890                 for (jj=ii; jj<bs2; jj+=bs) {
1891                   bap[jj] = *value++;
1892                 }
1893               }
1894             }
1895           } else {
1896             if (is == ADD_VALUES) {
1897               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1898                 for (jj=0; jj<bs; jj++) {
1899                   bap[jj] += value[jj];
1900                 }
1901                 bap += bs;
1902               }
1903             } else {
1904               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1905                 for (jj=0; jj<bs; jj++) {
1906                   bap[jj]  = value[jj];
1907                 }
1908                 bap += bs;
1909               }
1910             }
1911           }
1912           goto noinsert2;
1913         }
1914       }
1915       if (nonew == 1) goto noinsert2;
1916       PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
1917       if (A->structure_only) {
1918         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1919       } else {
1920         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1921       }
1922       N = nrow++ - 1; high++;
1923       /* shift up all the later entries in this row */
1924       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
1925       rp[i] = col;
1926       if (!A->structure_only) {
1927         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
1928         bap   = ap +  bs2*i;
1929         if (roworiented) {
1930           for (ii=0; ii<bs; ii++,value+=stepval) {
1931             for (jj=ii; jj<bs2; jj+=bs) {
1932               bap[jj] = *value++;
1933             }
1934           }
1935         } else {
1936           for (ii=0; ii<bs; ii++,value+=stepval) {
1937             for (jj=0; jj<bs; jj++) {
1938               *bap++ = *value++;
1939             }
1940           }
1941         }
1942       }
1943 noinsert2:;
1944       low = i;
1945     }
1946     ailen[row] = nrow;
1947   }
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1952 {
1953   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1954   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1955   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1956   PetscErrorCode ierr;
1957   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1958   MatScalar      *aa  = a->a,*ap;
1959   PetscReal      ratio=0.6;
1960 
1961   PetscFunctionBegin;
1962   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1963 
1964   if (m) rmax = ailen[0];
1965   for (i=1; i<mbs; i++) {
1966     /* move each row back by the amount of empty slots (fshift) before it*/
1967     fshift += imax[i-1] - ailen[i-1];
1968     rmax    = PetscMax(rmax,ailen[i]);
1969     if (fshift) {
1970       ip = aj + ai[i];
1971       ap = aa + bs2*ai[i];
1972       N  = ailen[i];
1973       ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr);
1974       if (!A->structure_only) {
1975         ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr);
1976       }
1977     }
1978     ai[i] = ai[i-1] + ailen[i-1];
1979   }
1980   if (mbs) {
1981     fshift += imax[mbs-1] - ailen[mbs-1];
1982     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1983   }
1984 
1985   /* reset ilen and imax for each row */
1986   a->nonzerorowcnt = 0;
1987   if (A->structure_only) {
1988     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1989   } else { /* !A->structure_only */
1990     for (i=0; i<mbs; i++) {
1991       ailen[i] = imax[i] = ai[i+1] - ai[i];
1992       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1993     }
1994   }
1995   a->nz = ai[mbs];
1996 
1997   /* diagonals may have moved, so kill the diagonal pointers */
1998   a->idiagvalid = PETSC_FALSE;
1999   if (fshift && a->diag) {
2000     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
2001     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2002     a->diag = NULL;
2003   }
2004   PetscCheckFalse(fshift && a->nounused == -1,PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2005   ierr = PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2006   ierr = PetscInfo(A,"Number of mallocs during MatSetValues is %" PetscInt_FMT "\n",a->reallocs);CHKERRQ(ierr);
2007   ierr = PetscInfo(A,"Most nonzeros blocks in any row is %" PetscInt_FMT "\n",rmax);CHKERRQ(ierr);
2008 
2009   A->info.mallocs    += a->reallocs;
2010   a->reallocs         = 0;
2011   A->info.nz_unneeded = (PetscReal)fshift*bs2;
2012   a->rmax             = rmax;
2013 
2014   if (!A->structure_only) {
2015     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2016   }
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 /*
2021    This function returns an array of flags which indicate the locations of contiguous
2022    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2023    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2024    Assume: sizes should be long enough to hold all the values.
2025 */
2026 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2027 {
2028   PetscInt  i,j,k,row;
2029   PetscBool flg;
2030 
2031   PetscFunctionBegin;
2032   for (i=0,j=0; i<n; j++) {
2033     row = idx[i];
2034     if (row%bs!=0) { /* Not the beginning of a block */
2035       sizes[j] = 1;
2036       i++;
2037     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2038       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2039       i++;
2040     } else { /* Begining of the block, so check if the complete block exists */
2041       flg = PETSC_TRUE;
2042       for (k=1; k<bs; k++) {
2043         if (row+k != idx[i+k]) { /* break in the block */
2044           flg = PETSC_FALSE;
2045           break;
2046         }
2047       }
2048       if (flg) { /* No break in the bs */
2049         sizes[j] = bs;
2050         i       += bs;
2051       } else {
2052         sizes[j] = 1;
2053         i++;
2054       }
2055     }
2056   }
2057   *bs_max = j;
2058   PetscFunctionReturn(0);
2059 }
2060 
2061 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2062 {
2063   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2064   PetscErrorCode    ierr;
2065   PetscInt          i,j,k,count,*rows;
2066   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2067   PetscScalar       zero = 0.0;
2068   MatScalar         *aa;
2069   const PetscScalar *xx;
2070   PetscScalar       *bb;
2071 
2072   PetscFunctionBegin;
2073   /* fix right hand side if needed */
2074   if (x && b) {
2075     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2076     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2077     for (i=0; i<is_n; i++) {
2078       bb[is_idx[i]] = diag*xx[is_idx[i]];
2079     }
2080     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2081     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2082   }
2083 
2084   /* Make a copy of the IS and  sort it */
2085   /* allocate memory for rows,sizes */
2086   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2087 
2088   /* copy IS values to rows, and sort them */
2089   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2090   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2091 
2092   if (baij->keepnonzeropattern) {
2093     for (i=0; i<is_n; i++) sizes[i] = 1;
2094     bs_max          = is_n;
2095   } else {
2096     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2097     A->nonzerostate++;
2098   }
2099 
2100   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2101     row = rows[j];
2102     PetscCheckFalse(row < 0 || row > A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %" PetscInt_FMT " out of range",row);
2103     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2104     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2105     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2106       if (diag != (PetscScalar)0.0) {
2107         if (baij->ilen[row/bs] > 0) {
2108           baij->ilen[row/bs]       = 1;
2109           baij->j[baij->i[row/bs]] = row/bs;
2110 
2111           ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr);
2112         }
2113         /* Now insert all the diagonal values for this bs */
2114         for (k=0; k<bs; k++) {
2115           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2116         }
2117       } else { /* (diag == 0.0) */
2118         baij->ilen[row/bs] = 0;
2119       } /* end (diag == 0.0) */
2120     } else { /* (sizes[i] != bs) */
2121       PetscAssert(sizes[i] == 1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2122       for (k=0; k<count; k++) {
2123         aa[0] =  zero;
2124         aa   += bs;
2125       }
2126       if (diag != (PetscScalar)0.0) {
2127         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2128       }
2129     }
2130   }
2131 
2132   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2133   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2138 {
2139   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2140   PetscErrorCode    ierr;
2141   PetscInt          i,j,k,count;
2142   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2143   PetscScalar       zero = 0.0;
2144   MatScalar         *aa;
2145   const PetscScalar *xx;
2146   PetscScalar       *bb;
2147   PetscBool         *zeroed,vecs = PETSC_FALSE;
2148 
2149   PetscFunctionBegin;
2150   /* fix right hand side if needed */
2151   if (x && b) {
2152     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2153     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2154     vecs = PETSC_TRUE;
2155   }
2156 
2157   /* zero the columns */
2158   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2159   for (i=0; i<is_n; i++) {
2160     PetscCheckFalse(is_idx[i] < 0 || is_idx[i] >= A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %" PetscInt_FMT " out of range",is_idx[i]);
2161     zeroed[is_idx[i]] = PETSC_TRUE;
2162   }
2163   for (i=0; i<A->rmap->N; i++) {
2164     if (!zeroed[i]) {
2165       row = i/bs;
2166       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2167         for (k=0; k<bs; k++) {
2168           col = bs*baij->j[j] + k;
2169           if (zeroed[col]) {
2170             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2171             if (vecs) bb[i] -= aa[0]*xx[col];
2172             aa[0] = 0.0;
2173           }
2174         }
2175       }
2176     } else if (vecs) bb[i] = diag*xx[i];
2177   }
2178   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2179   if (vecs) {
2180     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2181     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2182   }
2183 
2184   /* zero the rows */
2185   for (i=0; i<is_n; i++) {
2186     row   = is_idx[i];
2187     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2188     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2189     for (k=0; k<count; k++) {
2190       aa[0] =  zero;
2191       aa   += bs;
2192     }
2193     if (diag != (PetscScalar)0.0) {
2194       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2195     }
2196   }
2197   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2198   PetscFunctionReturn(0);
2199 }
2200 
2201 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2202 {
2203   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2204   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2205   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2206   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2207   PetscErrorCode ierr;
2208   PetscInt       ridx,cidx,bs2=a->bs2;
2209   PetscBool      roworiented=a->roworiented;
2210   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2211 
2212   PetscFunctionBegin;
2213   for (k=0; k<m; k++) { /* loop over added rows */
2214     row  = im[k];
2215     brow = row/bs;
2216     if (row < 0) continue;
2217     PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->N-1);
2218     rp   = aj + ai[brow];
2219     if (!A->structure_only) ap = aa + bs2*ai[brow];
2220     rmax = imax[brow];
2221     nrow = ailen[brow];
2222     low  = 0;
2223     high = nrow;
2224     for (l=0; l<n; l++) { /* loop over added columns */
2225       if (in[l] < 0) continue;
2226       PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[l],A->cmap->n-1);
2227       col  = in[l]; bcol = col/bs;
2228       ridx = row % bs; cidx = col % bs;
2229       if (!A->structure_only) {
2230         if (roworiented) {
2231           value = v[l + k*n];
2232         } else {
2233           value = v[k + l*m];
2234         }
2235       }
2236       if (col <= lastcol) low = 0; else high = nrow;
2237       lastcol = col;
2238       while (high-low > 7) {
2239         t = (low+high)/2;
2240         if (rp[t] > bcol) high = t;
2241         else              low  = t;
2242       }
2243       for (i=low; i<high; i++) {
2244         if (rp[i] > bcol) break;
2245         if (rp[i] == bcol) {
2246           bap = ap +  bs2*i + bs*cidx + ridx;
2247           if (!A->structure_only) {
2248             if (is == ADD_VALUES) *bap += value;
2249             else                  *bap  = value;
2250           }
2251           goto noinsert1;
2252         }
2253       }
2254       if (nonew == 1) goto noinsert1;
2255       PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2256       if (A->structure_only) {
2257         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2258       } else {
2259         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2260       }
2261       N = nrow++ - 1; high++;
2262       /* shift up all the later entries in this row */
2263       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
2264       rp[i] = bcol;
2265       if (!A->structure_only) {
2266         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
2267         ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr);
2268         ap[bs2*i + bs*cidx + ridx] = value;
2269       }
2270       a->nz++;
2271       A->nonzerostate++;
2272 noinsert1:;
2273       low = i;
2274     }
2275     ailen[brow] = nrow;
2276   }
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2281 {
2282   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2283   Mat            outA;
2284   PetscErrorCode ierr;
2285   PetscBool      row_identity,col_identity;
2286 
2287   PetscFunctionBegin;
2288   PetscCheckFalse(info->levels != 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2289   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2290   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2291   PetscCheckFalse(!row_identity || !col_identity,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2292 
2293   outA            = inA;
2294   inA->factortype = MAT_FACTOR_LU;
2295   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2296   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2297 
2298   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2299 
2300   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2301   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2302   a->row = row;
2303   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2304   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2305   a->col = col;
2306 
2307   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2308   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2309   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2310   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2311 
2312   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2313   if (!a->solve_work) {
2314     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2315     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2316   }
2317   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2322 {
2323   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2324   PetscInt    i,nz,mbs;
2325 
2326   PetscFunctionBegin;
2327   nz  = baij->maxnz;
2328   mbs = baij->mbs;
2329   for (i=0; i<nz; i++) {
2330     baij->j[i] = indices[i];
2331   }
2332   baij->nz = nz;
2333   for (i=0; i<mbs; i++) {
2334     baij->ilen[i] = baij->imax[i];
2335   }
2336   PetscFunctionReturn(0);
2337 }
2338 
2339 /*@
2340     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2341        in the matrix.
2342 
2343   Input Parameters:
2344 +  mat - the SeqBAIJ matrix
2345 -  indices - the column indices
2346 
2347   Level: advanced
2348 
2349   Notes:
2350     This can be called if you have precomputed the nonzero structure of the
2351   matrix and want to provide it to the matrix object to improve the performance
2352   of the MatSetValues() operation.
2353 
2354     You MUST have set the correct numbers of nonzeros per row in the call to
2355   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2356 
2357     MUST be called before any calls to MatSetValues();
2358 
2359 @*/
2360 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2361 {
2362   PetscErrorCode ierr;
2363 
2364   PetscFunctionBegin;
2365   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2366   PetscValidPointer(indices,2);
2367   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2368   PetscFunctionReturn(0);
2369 }
2370 
2371 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2372 {
2373   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2374   PetscErrorCode ierr;
2375   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2376   PetscReal      atmp;
2377   PetscScalar    *x,zero = 0.0;
2378   MatScalar      *aa;
2379   PetscInt       ncols,brow,krow,kcol;
2380 
2381   PetscFunctionBegin;
2382   PetscCheckFalse(A->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2383   bs  = A->rmap->bs;
2384   aa  = a->a;
2385   ai  = a->i;
2386   aj  = a->j;
2387   mbs = a->mbs;
2388 
2389   ierr = VecSet(v,zero);CHKERRQ(ierr);
2390   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2391   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2392   PetscCheckFalse(n != A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2393   for (i=0; i<mbs; i++) {
2394     ncols = ai[1] - ai[0]; ai++;
2395     brow  = bs*i;
2396     for (j=0; j<ncols; j++) {
2397       for (kcol=0; kcol<bs; kcol++) {
2398         for (krow=0; krow<bs; krow++) {
2399           atmp = PetscAbsScalar(*aa);aa++;
2400           row  = brow + krow;   /* row index */
2401           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2402         }
2403       }
2404       aj++;
2405     }
2406   }
2407   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2412 {
2413   PetscErrorCode ierr;
2414 
2415   PetscFunctionBegin;
2416   /* If the two matrices have the same copy implementation, use fast copy. */
2417   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2418     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2419     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2420     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2421 
2422     PetscCheckFalse(a->i[ambs] != b->i[bmbs],PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different",a->i[ambs],b->i[bmbs]);
2423     PetscCheckFalse(abs != bbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different",abs,bbs);
2424     ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr);
2425     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2426   } else {
2427     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2428   }
2429   PetscFunctionReturn(0);
2430 }
2431 
2432 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2433 {
2434   PetscErrorCode ierr;
2435 
2436   PetscFunctionBegin;
2437   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2438   PetscFunctionReturn(0);
2439 }
2440 
2441 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2442 {
2443   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2444 
2445   PetscFunctionBegin;
2446   *array = a->a;
2447   PetscFunctionReturn(0);
2448 }
2449 
2450 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2451 {
2452   PetscFunctionBegin;
2453   *array = NULL;
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2458 {
2459   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2460   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2461   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2462   PetscErrorCode ierr;
2463 
2464   PetscFunctionBegin;
2465   /* Set the number of nonzeros in the new matrix */
2466   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2471 {
2472   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2473   PetscErrorCode ierr;
2474   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2475   PetscBLASInt   one=1;
2476 
2477   PetscFunctionBegin;
2478   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2479     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2480     if (e) {
2481       ierr = PetscArraycmp(x->i,y->i,x->mbs+1,&e);CHKERRQ(ierr);
2482       if (e) {
2483         ierr = PetscArraycmp(x->j,y->j,x->i[x->mbs],&e);CHKERRQ(ierr);
2484         if (e) str = SAME_NONZERO_PATTERN;
2485       }
2486     }
2487     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"MatStructure is not SAME_NONZERO_PATTERN");
2488   }
2489   if (str == SAME_NONZERO_PATTERN) {
2490     PetscScalar  alpha = a;
2491     PetscBLASInt bnz;
2492     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2493     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2494     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2495   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2496     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2497   } else {
2498     Mat      B;
2499     PetscInt *nnz;
2500     PetscCheck(bs == X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2501     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2502     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2503     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2504     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2505     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2506     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2507     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2508     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2509     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2510     ierr = MatHeaderMerge(Y,&B);CHKERRQ(ierr);
2511     ierr = PetscFree(nnz);CHKERRQ(ierr);
2512   }
2513   PetscFunctionReturn(0);
2514 }
2515 
2516 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2517 {
2518 #if defined(PETSC_USE_COMPLEX)
2519   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2520   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2521   MatScalar   *aa = a->a;
2522 
2523   PetscFunctionBegin;
2524   for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]);
2525 #else
2526   PetscFunctionBegin;
2527 #endif
2528   PetscFunctionReturn(0);
2529 }
2530 
2531 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2532 {
2533   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2534   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2535   MatScalar   *aa = a->a;
2536 
2537   PetscFunctionBegin;
2538   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2539   PetscFunctionReturn(0);
2540 }
2541 
2542 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2543 {
2544   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2545   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2546   MatScalar   *aa = a->a;
2547 
2548   PetscFunctionBegin;
2549   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2550   PetscFunctionReturn(0);
2551 }
2552 
2553 /*
2554     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2555 */
2556 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2557 {
2558   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2559   PetscErrorCode ierr;
2560   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2561   PetscInt       nz = a->i[m],row,*jj,mr,col;
2562 
2563   PetscFunctionBegin;
2564   *nn = n;
2565   if (!ia) PetscFunctionReturn(0);
2566   PetscCheckFalse(symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2567   else {
2568     ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2569     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2570     ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2571     jj   = a->j;
2572     for (i=0; i<nz; i++) {
2573       collengths[jj[i]]++;
2574     }
2575     cia[0] = oshift;
2576     for (i=0; i<n; i++) {
2577       cia[i+1] = cia[i] + collengths[i];
2578     }
2579     ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2580     jj   = a->j;
2581     for (row=0; row<m; row++) {
2582       mr = a->i[row+1] - a->i[row];
2583       for (i=0; i<mr; i++) {
2584         col = *jj++;
2585 
2586         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2587       }
2588     }
2589     ierr = PetscFree(collengths);CHKERRQ(ierr);
2590     *ia  = cia; *ja = cja;
2591   }
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2596 {
2597   PetscErrorCode ierr;
2598 
2599   PetscFunctionBegin;
2600   if (!ia) PetscFunctionReturn(0);
2601   ierr = PetscFree(*ia);CHKERRQ(ierr);
2602   ierr = PetscFree(*ja);CHKERRQ(ierr);
2603   PetscFunctionReturn(0);
2604 }
2605 
2606 /*
2607  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2608  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2609  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2610  */
2611 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2612 {
2613   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2614   PetscErrorCode ierr;
2615   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2616   PetscInt       nz = a->i[m],row,*jj,mr,col;
2617   PetscInt       *cspidx;
2618 
2619   PetscFunctionBegin;
2620   *nn = n;
2621   if (!ia) PetscFunctionReturn(0);
2622 
2623   ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2624   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2625   ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2626   ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr);
2627   jj   = a->j;
2628   for (i=0; i<nz; i++) {
2629     collengths[jj[i]]++;
2630   }
2631   cia[0] = oshift;
2632   for (i=0; i<n; i++) {
2633     cia[i+1] = cia[i] + collengths[i];
2634   }
2635   ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2636   jj   = a->j;
2637   for (row=0; row<m; row++) {
2638     mr = a->i[row+1] - a->i[row];
2639     for (i=0; i<mr; i++) {
2640       col = *jj++;
2641       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2642       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2643     }
2644   }
2645   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2646   *ia    = cia;
2647   *ja    = cja;
2648   *spidx = cspidx;
2649   PetscFunctionReturn(0);
2650 }
2651 
2652 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2653 {
2654   PetscErrorCode ierr;
2655 
2656   PetscFunctionBegin;
2657   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2658   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2659   PetscFunctionReturn(0);
2660 }
2661 
2662 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2663 {
2664   PetscErrorCode ierr;
2665   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2666 
2667   PetscFunctionBegin;
2668   if (!Y->preallocated || !aij->nz) {
2669     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2670   }
2671   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2672   PetscFunctionReturn(0);
2673 }
2674 
2675 /* -------------------------------------------------------------------*/
2676 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2677                                        MatGetRow_SeqBAIJ,
2678                                        MatRestoreRow_SeqBAIJ,
2679                                        MatMult_SeqBAIJ_N,
2680                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2681                                        MatMultTranspose_SeqBAIJ,
2682                                        MatMultTransposeAdd_SeqBAIJ,
2683                                        NULL,
2684                                        NULL,
2685                                        NULL,
2686                                /* 10*/ NULL,
2687                                        MatLUFactor_SeqBAIJ,
2688                                        NULL,
2689                                        NULL,
2690                                        MatTranspose_SeqBAIJ,
2691                                /* 15*/ MatGetInfo_SeqBAIJ,
2692                                        MatEqual_SeqBAIJ,
2693                                        MatGetDiagonal_SeqBAIJ,
2694                                        MatDiagonalScale_SeqBAIJ,
2695                                        MatNorm_SeqBAIJ,
2696                                /* 20*/ NULL,
2697                                        MatAssemblyEnd_SeqBAIJ,
2698                                        MatSetOption_SeqBAIJ,
2699                                        MatZeroEntries_SeqBAIJ,
2700                                /* 24*/ MatZeroRows_SeqBAIJ,
2701                                        NULL,
2702                                        NULL,
2703                                        NULL,
2704                                        NULL,
2705                                /* 29*/ MatSetUp_SeqBAIJ,
2706                                        NULL,
2707                                        NULL,
2708                                        NULL,
2709                                        NULL,
2710                                /* 34*/ MatDuplicate_SeqBAIJ,
2711                                        NULL,
2712                                        NULL,
2713                                        MatILUFactor_SeqBAIJ,
2714                                        NULL,
2715                                /* 39*/ MatAXPY_SeqBAIJ,
2716                                        MatCreateSubMatrices_SeqBAIJ,
2717                                        MatIncreaseOverlap_SeqBAIJ,
2718                                        MatGetValues_SeqBAIJ,
2719                                        MatCopy_SeqBAIJ,
2720                                /* 44*/ NULL,
2721                                        MatScale_SeqBAIJ,
2722                                        MatShift_SeqBAIJ,
2723                                        NULL,
2724                                        MatZeroRowsColumns_SeqBAIJ,
2725                                /* 49*/ NULL,
2726                                        MatGetRowIJ_SeqBAIJ,
2727                                        MatRestoreRowIJ_SeqBAIJ,
2728                                        MatGetColumnIJ_SeqBAIJ,
2729                                        MatRestoreColumnIJ_SeqBAIJ,
2730                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                        MatSetValuesBlocked_SeqBAIJ,
2735                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2736                                        MatDestroy_SeqBAIJ,
2737                                        MatView_SeqBAIJ,
2738                                        NULL,
2739                                        NULL,
2740                                /* 64*/ NULL,
2741                                        NULL,
2742                                        NULL,
2743                                        NULL,
2744                                        NULL,
2745                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2746                                        NULL,
2747                                        MatConvert_Basic,
2748                                        NULL,
2749                                        NULL,
2750                                /* 74*/ NULL,
2751                                        MatFDColoringApply_BAIJ,
2752                                        NULL,
2753                                        NULL,
2754                                        NULL,
2755                                /* 79*/ NULL,
2756                                        NULL,
2757                                        NULL,
2758                                        NULL,
2759                                        MatLoad_SeqBAIJ,
2760                                /* 84*/ NULL,
2761                                        NULL,
2762                                        NULL,
2763                                        NULL,
2764                                        NULL,
2765                                /* 89*/ NULL,
2766                                        NULL,
2767                                        NULL,
2768                                        NULL,
2769                                        NULL,
2770                                /* 94*/ NULL,
2771                                        NULL,
2772                                        NULL,
2773                                        NULL,
2774                                        NULL,
2775                                /* 99*/ NULL,
2776                                        NULL,
2777                                        NULL,
2778                                        MatConjugate_SeqBAIJ,
2779                                        NULL,
2780                                /*104*/ NULL,
2781                                        MatRealPart_SeqBAIJ,
2782                                        MatImaginaryPart_SeqBAIJ,
2783                                        NULL,
2784                                        NULL,
2785                                /*109*/ NULL,
2786                                        NULL,
2787                                        NULL,
2788                                        NULL,
2789                                        MatMissingDiagonal_SeqBAIJ,
2790                                /*114*/ NULL,
2791                                        NULL,
2792                                        NULL,
2793                                        NULL,
2794                                        NULL,
2795                                /*119*/ NULL,
2796                                        NULL,
2797                                        MatMultHermitianTranspose_SeqBAIJ,
2798                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2799                                        NULL,
2800                                /*124*/ NULL,
2801                                        MatGetColumnReductions_SeqBAIJ,
2802                                        MatInvertBlockDiagonal_SeqBAIJ,
2803                                        NULL,
2804                                        NULL,
2805                                /*129*/ NULL,
2806                                        NULL,
2807                                        NULL,
2808                                        NULL,
2809                                        NULL,
2810                                /*134*/ NULL,
2811                                        NULL,
2812                                        NULL,
2813                                        NULL,
2814                                        NULL,
2815                                /*139*/ MatSetBlockSizes_Default,
2816                                        NULL,
2817                                        NULL,
2818                                        MatFDColoringSetUp_SeqXAIJ,
2819                                        NULL,
2820                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2821                                        MatDestroySubMatrices_SeqBAIJ,
2822                                        NULL,
2823                                        NULL
2824 };
2825 
2826 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2827 {
2828   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2829   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2830   PetscErrorCode ierr;
2831 
2832   PetscFunctionBegin;
2833   PetscCheckFalse(aij->nonew != 1,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2834 
2835   /* allocate space for values if not already there */
2836   if (!aij->saved_values) {
2837     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2838     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2839   }
2840 
2841   /* copy values over */
2842   ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr);
2843   PetscFunctionReturn(0);
2844 }
2845 
2846 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2847 {
2848   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2849   PetscErrorCode ierr;
2850   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2851 
2852   PetscFunctionBegin;
2853   PetscCheckFalse(aij->nonew != 1,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2854   PetscCheckFalse(!aij->saved_values,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2855 
2856   /* copy values over */
2857   ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr);
2858   PetscFunctionReturn(0);
2859 }
2860 
2861 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2862 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2863 
2864 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2865 {
2866   Mat_SeqBAIJ    *b;
2867   PetscErrorCode ierr;
2868   PetscInt       i,mbs,nbs,bs2;
2869   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2870 
2871   PetscFunctionBegin;
2872   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2873   if (nz == MAT_SKIP_ALLOCATION) {
2874     skipallocation = PETSC_TRUE;
2875     nz             = 0;
2876   }
2877 
2878   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2879   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2880   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2881   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2882 
2883   B->preallocated = PETSC_TRUE;
2884 
2885   mbs = B->rmap->n/bs;
2886   nbs = B->cmap->n/bs;
2887   bs2 = bs*bs;
2888 
2889   PetscCheckFalse(mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,B->cmap->n,bs);
2890 
2891   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2892   PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %" PetscInt_FMT,nz);
2893   if (nnz) {
2894     for (i=0; i<mbs; i++) {
2895       PetscCheckFalse(nnz[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,nnz[i]);
2896       PetscCheckFalse(nnz[i] > nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT,i,nnz[i],nbs);
2897     }
2898   }
2899 
2900   b    = (Mat_SeqBAIJ*)B->data;
2901   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2902   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2903   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2904 
2905   if (!flg) {
2906     switch (bs) {
2907     case 1:
2908       B->ops->mult    = MatMult_SeqBAIJ_1;
2909       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2910       break;
2911     case 2:
2912       B->ops->mult    = MatMult_SeqBAIJ_2;
2913       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2914       break;
2915     case 3:
2916       B->ops->mult    = MatMult_SeqBAIJ_3;
2917       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2918       break;
2919     case 4:
2920       B->ops->mult    = MatMult_SeqBAIJ_4;
2921       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2922       break;
2923     case 5:
2924       B->ops->mult    = MatMult_SeqBAIJ_5;
2925       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2926       break;
2927     case 6:
2928       B->ops->mult    = MatMult_SeqBAIJ_6;
2929       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2930       break;
2931     case 7:
2932       B->ops->mult    = MatMult_SeqBAIJ_7;
2933       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2934       break;
2935     case 9:
2936     {
2937       PetscInt version = 1;
2938       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
2939       switch (version) {
2940 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2941       case 1:
2942         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2943         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2944         ierr = PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);CHKERRQ(ierr);
2945         break;
2946 #endif
2947       default:
2948         B->ops->mult    = MatMult_SeqBAIJ_N;
2949         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2950         ierr = PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);CHKERRQ(ierr);
2951         break;
2952       }
2953       break;
2954     }
2955     case 11:
2956       B->ops->mult    = MatMult_SeqBAIJ_11;
2957       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2958       break;
2959     case 12:
2960     {
2961       PetscInt version = 1;
2962       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
2963       switch (version) {
2964       case 1:
2965         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
2966         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2967         ierr = PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);CHKERRQ(ierr);
2968         break;
2969       case 2:
2970         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
2971         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
2972         ierr = PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);CHKERRQ(ierr);
2973         break;
2974 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2975       case 3:
2976         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
2977         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2978         ierr = PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);CHKERRQ(ierr);
2979         break;
2980 #endif
2981       default:
2982         B->ops->mult    = MatMult_SeqBAIJ_N;
2983         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2984         ierr = PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);CHKERRQ(ierr);
2985         break;
2986       }
2987       break;
2988     }
2989     case 15:
2990     {
2991       PetscInt version = 1;
2992       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
2993       switch (version) {
2994       case 1:
2995         B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2996         ierr = PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);CHKERRQ(ierr);
2997         break;
2998       case 2:
2999         B->ops->mult    = MatMult_SeqBAIJ_15_ver2;
3000         ierr = PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);CHKERRQ(ierr);
3001         break;
3002       case 3:
3003         B->ops->mult    = MatMult_SeqBAIJ_15_ver3;
3004         ierr = PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);CHKERRQ(ierr);
3005         break;
3006       case 4:
3007         B->ops->mult    = MatMult_SeqBAIJ_15_ver4;
3008         ierr = PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs);CHKERRQ(ierr);
3009         break;
3010       default:
3011         B->ops->mult    = MatMult_SeqBAIJ_N;
3012         ierr = PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);CHKERRQ(ierr);
3013         break;
3014       }
3015       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3016       break;
3017     }
3018     default:
3019       B->ops->mult    = MatMult_SeqBAIJ_N;
3020       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3021       ierr = PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs);CHKERRQ(ierr);
3022       break;
3023     }
3024   }
3025   B->ops->sor = MatSOR_SeqBAIJ;
3026   b->mbs = mbs;
3027   b->nbs = nbs;
3028   if (!skipallocation) {
3029     if (!b->imax) {
3030       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
3031       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3032 
3033       b->free_imax_ilen = PETSC_TRUE;
3034     }
3035     /* b->ilen will count nonzeros in each block row so far. */
3036     for (i=0; i<mbs; i++) b->ilen[i] = 0;
3037     if (!nnz) {
3038       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3039       else if (nz < 0) nz = 1;
3040       nz = PetscMin(nz,nbs);
3041       for (i=0; i<mbs; i++) b->imax[i] = nz;
3042       ierr = PetscIntMultError(nz,mbs,&nz);CHKERRQ(ierr);
3043     } else {
3044       PetscInt64 nz64 = 0;
3045       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
3046       ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr);
3047     }
3048 
3049     /* allocate the matrix space */
3050     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3051     if (B->structure_only) {
3052       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
3053       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
3054       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
3055     } else {
3056       PetscInt nzbs2 = 0;
3057       ierr = PetscIntMultError(nz,bs2,&nzbs2);CHKERRQ(ierr);
3058       ierr = PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
3059       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3060       ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr);
3061     }
3062     ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr);
3063 
3064     if (B->structure_only) {
3065       b->singlemalloc = PETSC_FALSE;
3066       b->free_a       = PETSC_FALSE;
3067     } else {
3068       b->singlemalloc = PETSC_TRUE;
3069       b->free_a       = PETSC_TRUE;
3070     }
3071     b->free_ij = PETSC_TRUE;
3072 
3073     b->i[0] = 0;
3074     for (i=1; i<mbs+1; i++) {
3075       b->i[i] = b->i[i-1] + b->imax[i-1];
3076     }
3077 
3078   } else {
3079     b->free_a  = PETSC_FALSE;
3080     b->free_ij = PETSC_FALSE;
3081   }
3082 
3083   b->bs2              = bs2;
3084   b->mbs              = mbs;
3085   b->nz               = 0;
3086   b->maxnz            = nz;
3087   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3088   B->was_assembled    = PETSC_FALSE;
3089   B->assembled        = PETSC_FALSE;
3090   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3091   PetscFunctionReturn(0);
3092 }
3093 
3094 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3095 {
3096   PetscInt       i,m,nz,nz_max=0,*nnz;
3097   PetscScalar    *values=NULL;
3098   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
3099   PetscErrorCode ierr;
3100 
3101   PetscFunctionBegin;
3102   PetscCheckFalse(bs < 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs);
3103   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3104   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3105   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3106   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3107   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3108   m    = B->rmap->n/bs;
3109 
3110   PetscCheckFalse(ii[0] != 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]);
3111   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
3112   for (i=0; i<m; i++) {
3113     nz = ii[i+1]- ii[i];
3114     PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz);
3115     nz_max = PetscMax(nz_max, nz);
3116     nnz[i] = nz;
3117   }
3118   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3119   ierr = PetscFree(nnz);CHKERRQ(ierr);
3120 
3121   values = (PetscScalar*)V;
3122   if (!values) {
3123     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
3124   }
3125   for (i=0; i<m; i++) {
3126     PetscInt          ncols  = ii[i+1] - ii[i];
3127     const PetscInt    *icols = jj + ii[i];
3128     if (bs == 1 || !roworiented) {
3129       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3130       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3131     } else {
3132       PetscInt j;
3133       for (j=0; j<ncols; j++) {
3134         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3135         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
3136       }
3137     }
3138   }
3139   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3140   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3141   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3142   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3143   PetscFunctionReturn(0);
3144 }
3145 
3146 /*@C
3147    MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
3148 
3149    Not Collective
3150 
3151    Input Parameter:
3152 .  mat - a MATSEQBAIJ matrix
3153 
3154    Output Parameter:
3155 .   array - pointer to the data
3156 
3157    Level: intermediate
3158 
3159 .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3160 @*/
3161 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3162 {
3163   PetscErrorCode ierr;
3164 
3165   PetscFunctionBegin;
3166   ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3167   PetscFunctionReturn(0);
3168 }
3169 
3170 /*@C
3171    MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3172 
3173    Not Collective
3174 
3175    Input Parameters:
3176 +  mat - a MATSEQBAIJ matrix
3177 -  array - pointer to the data
3178 
3179    Level: intermediate
3180 
3181 .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3182 @*/
3183 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3184 {
3185   PetscErrorCode ierr;
3186 
3187   PetscFunctionBegin;
3188   ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3189   PetscFunctionReturn(0);
3190 }
3191 
3192 /*MC
3193    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3194    block sparse compressed row format.
3195 
3196    Options Database Keys:
3197 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3198 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3199 
3200    Level: beginner
3201 
3202    Notes:
3203     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3204     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
3205 
3206    Run with -info to see what version of the matrix-vector product is being used
3207 
3208 .seealso: MatCreateSeqBAIJ()
3209 M*/
3210 
3211 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3212 
3213 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3214 {
3215   PetscErrorCode ierr;
3216   PetscMPIInt    size;
3217   Mat_SeqBAIJ    *b;
3218 
3219   PetscFunctionBegin;
3220   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
3221   PetscCheckFalse(size > 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3222 
3223   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3224   B->data = (void*)b;
3225   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3226 
3227   b->row          = NULL;
3228   b->col          = NULL;
3229   b->icol         = NULL;
3230   b->reallocs     = 0;
3231   b->saved_values = NULL;
3232 
3233   b->roworiented        = PETSC_TRUE;
3234   b->nonew              = 0;
3235   b->diag               = NULL;
3236   B->spptr              = NULL;
3237   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3238   b->keepnonzeropattern = PETSC_FALSE;
3239 
3240   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr);
3241   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr);
3242   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3243   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3244   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3245   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3246   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3247   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3248   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3249   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3250 #if defined(PETSC_HAVE_HYPRE)
3251   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3252 #endif
3253   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3254   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3255   PetscFunctionReturn(0);
3256 }
3257 
3258 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3259 {
3260   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3261   PetscErrorCode ierr;
3262   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3263 
3264   PetscFunctionBegin;
3265   PetscCheckFalse(a->i[mbs] != nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3266 
3267   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3268     c->imax           = a->imax;
3269     c->ilen           = a->ilen;
3270     c->free_imax_ilen = PETSC_FALSE;
3271   } else {
3272     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3273     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3274     for (i=0; i<mbs; i++) {
3275       c->imax[i] = a->imax[i];
3276       c->ilen[i] = a->ilen[i];
3277     }
3278     c->free_imax_ilen = PETSC_TRUE;
3279   }
3280 
3281   /* allocate the matrix space */
3282   if (mallocmatspace) {
3283     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3284       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3285       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3286 
3287       c->i            = a->i;
3288       c->j            = a->j;
3289       c->singlemalloc = PETSC_FALSE;
3290       c->free_a       = PETSC_TRUE;
3291       c->free_ij      = PETSC_FALSE;
3292       c->parent       = A;
3293       C->preallocated = PETSC_TRUE;
3294       C->assembled    = PETSC_TRUE;
3295 
3296       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3297       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3298       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3299     } else {
3300       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3301       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3302 
3303       c->singlemalloc = PETSC_TRUE;
3304       c->free_a       = PETSC_TRUE;
3305       c->free_ij      = PETSC_TRUE;
3306 
3307       ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr);
3308       if (mbs > 0) {
3309         ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr);
3310         if (cpvalues == MAT_COPY_VALUES) {
3311           ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr);
3312         } else {
3313           ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr);
3314         }
3315       }
3316       C->preallocated = PETSC_TRUE;
3317       C->assembled    = PETSC_TRUE;
3318     }
3319   }
3320 
3321   c->roworiented = a->roworiented;
3322   c->nonew       = a->nonew;
3323 
3324   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3325   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3326 
3327   c->bs2         = a->bs2;
3328   c->mbs         = a->mbs;
3329   c->nbs         = a->nbs;
3330 
3331   if (a->diag) {
3332     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3333       c->diag      = a->diag;
3334       c->free_diag = PETSC_FALSE;
3335     } else {
3336       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3337       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3338       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3339       c->free_diag = PETSC_TRUE;
3340     }
3341   } else c->diag = NULL;
3342 
3343   c->nz         = a->nz;
3344   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3345   c->solve_work = NULL;
3346   c->mult_work  = NULL;
3347   c->sor_workt  = NULL;
3348   c->sor_work   = NULL;
3349 
3350   c->compressedrow.use   = a->compressedrow.use;
3351   c->compressedrow.nrows = a->compressedrow.nrows;
3352   if (a->compressedrow.use) {
3353     i    = a->compressedrow.nrows;
3354     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3355     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3356     ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr);
3357     ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr);
3358   } else {
3359     c->compressedrow.use    = PETSC_FALSE;
3360     c->compressedrow.i      = NULL;
3361     c->compressedrow.rindex = NULL;
3362   }
3363   C->nonzerostate = A->nonzerostate;
3364 
3365   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3366   PetscFunctionReturn(0);
3367 }
3368 
3369 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3370 {
3371   PetscErrorCode ierr;
3372 
3373   PetscFunctionBegin;
3374   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3375   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3376   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3377   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3378   PetscFunctionReturn(0);
3379 }
3380 
3381 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3382 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3383 {
3384   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3385   PetscInt       *rowidxs,*colidxs;
3386   PetscScalar    *matvals;
3387   PetscErrorCode ierr;
3388 
3389   PetscFunctionBegin;
3390   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3391 
3392   /* read matrix header */
3393   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3394   PetscCheckFalse(header[0] != MAT_FILE_CLASSID,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3395   M = header[1]; N = header[2]; nz = header[3];
3396   PetscCheckFalse(M < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3397   PetscCheckFalse(N < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3398   PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ");
3399 
3400   /* set block sizes from the viewer's .info file */
3401   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3402   /* set local and global sizes if not set already */
3403   if (mat->rmap->n < 0) mat->rmap->n = M;
3404   if (mat->cmap->n < 0) mat->cmap->n = N;
3405   if (mat->rmap->N < 0) mat->rmap->N = M;
3406   if (mat->cmap->N < 0) mat->cmap->N = N;
3407   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3408   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3409 
3410   /* check if the matrix sizes are correct */
3411   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3412   PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3413   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3414   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3415   mbs = m/bs; nbs = n/bs;
3416 
3417   /* read in row lengths, column indices and nonzero values */
3418   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3419   ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr);
3420   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3421   sum = rowidxs[m];
3422   PetscCheckFalse(sum != nz,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3423 
3424   /* read in column indices and nonzero values */
3425   ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr);
3426   ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr);
3427   ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr);
3428 
3429   { /* preallocate matrix storage */
3430     PetscBT   bt; /* helper bit set to count nonzeros */
3431     PetscInt  *nnz;
3432     PetscBool sbaij;
3433 
3434     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3435     ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr);
3436     ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr);
3437     for (i=0; i<mbs; i++) {
3438       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3439       for (k=0; k<bs; k++) {
3440         PetscInt row = bs*i + k;
3441         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3442           PetscInt col = colidxs[j];
3443           if (!sbaij || col >= row)
3444             if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++;
3445         }
3446       }
3447     }
3448     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3449     ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3450     ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3451     ierr = PetscFree(nnz);CHKERRQ(ierr);
3452   }
3453 
3454   /* store matrix values */
3455   for (i=0; i<m; i++) {
3456     PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3457     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3458   }
3459 
3460   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3461   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3462   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3463   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3464   PetscFunctionReturn(0);
3465 }
3466 
3467 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3468 {
3469   PetscErrorCode ierr;
3470   PetscBool      isbinary;
3471 
3472   PetscFunctionBegin;
3473   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3474   PetscCheckFalse(!isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3475   ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3476   PetscFunctionReturn(0);
3477 }
3478 
3479 /*@C
3480    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3481    compressed row) format.  For good matrix assembly performance the
3482    user should preallocate the matrix storage by setting the parameter nz
3483    (or the array nnz).  By setting these parameters accurately, performance
3484    during matrix assembly can be increased by more than a factor of 50.
3485 
3486    Collective
3487 
3488    Input Parameters:
3489 +  comm - MPI communicator, set to PETSC_COMM_SELF
3490 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3491           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3492 .  m - number of rows
3493 .  n - number of columns
3494 .  nz - number of nonzero blocks  per block row (same for all rows)
3495 -  nnz - array containing the number of nonzero blocks in the various block rows
3496          (possibly different for each block row) or NULL
3497 
3498    Output Parameter:
3499 .  A - the matrix
3500 
3501    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3502    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3503    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3504 
3505    Options Database Keys:
3506 +   -mat_no_unroll - uses code that does not unroll the loops in the
3507                      block calculations (much slower)
3508 -    -mat_block_size - size of the blocks to use
3509 
3510    Level: intermediate
3511 
3512    Notes:
3513    The number of rows and columns must be divisible by blocksize.
3514 
3515    If the nnz parameter is given then the nz parameter is ignored
3516 
3517    A nonzero block is any block that as 1 or more nonzeros in it
3518 
3519    The block AIJ format is fully compatible with standard Fortran 77
3520    storage.  That is, the stored row and column indices can begin at
3521    either one (as in Fortran) or zero.  See the users' manual for details.
3522 
3523    Specify the preallocated storage with either nz or nnz (not both).
3524    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3525    allocation.  See Users-Manual: ch_mat for details.
3526    matrices.
3527 
3528 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3529 @*/
3530 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3531 {
3532   PetscErrorCode ierr;
3533 
3534   PetscFunctionBegin;
3535   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3536   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3537   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3538   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3539   PetscFunctionReturn(0);
3540 }
3541 
3542 /*@C
3543    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3544    per row in the matrix. For good matrix assembly performance the
3545    user should preallocate the matrix storage by setting the parameter nz
3546    (or the array nnz).  By setting these parameters accurately, performance
3547    during matrix assembly can be increased by more than a factor of 50.
3548 
3549    Collective
3550 
3551    Input Parameters:
3552 +  B - the matrix
3553 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3554           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3555 .  nz - number of block nonzeros per block row (same for all rows)
3556 -  nnz - array containing the number of block nonzeros in the various block rows
3557          (possibly different for each block row) or NULL
3558 
3559    Options Database Keys:
3560 +   -mat_no_unroll - uses code that does not unroll the loops in the
3561                      block calculations (much slower)
3562 -   -mat_block_size - size of the blocks to use
3563 
3564    Level: intermediate
3565 
3566    Notes:
3567    If the nnz parameter is given then the nz parameter is ignored
3568 
3569    You can call MatGetInfo() to get information on how effective the preallocation was;
3570    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3571    You can also run with the option -info and look for messages with the string
3572    malloc in them to see if additional memory allocation was needed.
3573 
3574    The block AIJ format is fully compatible with standard Fortran 77
3575    storage.  That is, the stored row and column indices can begin at
3576    either one (as in Fortran) or zero.  See the users' manual for details.
3577 
3578    Specify the preallocated storage with either nz or nnz (not both).
3579    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3580    allocation.  See Users-Manual: ch_mat for details.
3581 
3582 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3583 @*/
3584 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3585 {
3586   PetscErrorCode ierr;
3587 
3588   PetscFunctionBegin;
3589   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3590   PetscValidType(B,1);
3591   PetscValidLogicalCollectiveInt(B,bs,2);
3592   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3593   PetscFunctionReturn(0);
3594 }
3595 
3596 /*@C
3597    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3598 
3599    Collective
3600 
3601    Input Parameters:
3602 +  B - the matrix
3603 .  i - the indices into j for the start of each local row (starts with zero)
3604 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3605 -  v - optional values in the matrix
3606 
3607    Level: advanced
3608 
3609    Notes:
3610    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3611    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3612    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3613    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3614    block column and the second index is over columns within a block.
3615 
3616    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3617 
3618 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3619 @*/
3620 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3621 {
3622   PetscErrorCode ierr;
3623 
3624   PetscFunctionBegin;
3625   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3626   PetscValidType(B,1);
3627   PetscValidLogicalCollectiveInt(B,bs,2);
3628   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3629   PetscFunctionReturn(0);
3630 }
3631 
3632 /*@
3633      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3634 
3635      Collective
3636 
3637    Input Parameters:
3638 +  comm - must be an MPI communicator of size 1
3639 .  bs - size of block
3640 .  m - number of rows
3641 .  n - number of columns
3642 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3643 .  j - column indices
3644 -  a - matrix values
3645 
3646    Output Parameter:
3647 .  mat - the matrix
3648 
3649    Level: advanced
3650 
3651    Notes:
3652        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3653     once the matrix is destroyed
3654 
3655        You cannot set new nonzero locations into this matrix, that will generate an error.
3656 
3657        The i and j indices are 0 based
3658 
3659        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3660 
3661       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3662       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3663       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3664       with column-major ordering within blocks.
3665 
3666 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3667 
3668 @*/
3669 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3670 {
3671   PetscErrorCode ierr;
3672   PetscInt       ii;
3673   Mat_SeqBAIJ    *baij;
3674 
3675   PetscFunctionBegin;
3676   PetscCheckFalse(bs != 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %" PetscInt_FMT " > 1 is not supported yet",bs);
3677   PetscCheckFalse(m > 0 && i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3678 
3679   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3680   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3681   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3682   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
3683   baij = (Mat_SeqBAIJ*)(*mat)->data;
3684   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3685   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3686 
3687   baij->i = i;
3688   baij->j = j;
3689   baij->a = a;
3690 
3691   baij->singlemalloc = PETSC_FALSE;
3692   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3693   baij->free_a       = PETSC_FALSE;
3694   baij->free_ij      = PETSC_FALSE;
3695 
3696   for (ii=0; ii<m; ii++) {
3697     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3698     PetscCheck(i[ii+1] - i[ii] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT,ii,i[ii+1] - i[ii]);
3699   }
3700   if (PetscDefined(USE_DEBUG)) {
3701     for (ii=0; ii<baij->i[m]; ii++) {
3702       PetscCheck(j[ii] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT,ii,j[ii]);
3703       PetscCheck(j[ii] <= n - 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT,ii,j[ii]);
3704     }
3705   }
3706 
3707   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3708   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3709   PetscFunctionReturn(0);
3710 }
3711 
3712 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3713 {
3714   PetscErrorCode ierr;
3715   PetscMPIInt    size;
3716 
3717   PetscFunctionBegin;
3718   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3719   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3720     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3721   } else {
3722     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3723   }
3724   PetscFunctionReturn(0);
3725 }
3726