xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 7d5fd1e4d9337468ad3f05b65b7facdcd2dfd2a4)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 
20 PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A,PetscInt type,PetscReal *reductions)
21 {
22   PetscErrorCode ierr;
23   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) A->data;
24   PetscInt       m,n,i;
25   PetscInt       ib,jb,bs = A->rmap->bs;
26   MatScalar      *a_val = a_aij->a;
27 
28   PetscFunctionBegin;
29   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
30   for (i=0; i<n; i++) reductions[i] = 0.0;
31   if (type == NORM_2) {
32     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
33       for (jb=0; jb<bs; jb++) {
34         for (ib=0; ib<bs; ib++) {
35           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
36           a_val++;
37         }
38       }
39     }
40   } else if (type == NORM_1) {
41     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
42       for (jb=0; jb<bs; jb++) {
43         for (ib=0; ib<bs; ib++) {
44           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
45           a_val++;
46         }
47       }
48     }
49   } else if (type == NORM_INFINITY) {
50     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
51       for (jb=0; jb<bs; jb++) {
52         for (ib=0; ib<bs; ib++) {
53           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
54           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
55           a_val++;
56         }
57       }
58     }
59   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
60     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
61       for (jb=0; jb<bs; jb++) {
62         for (ib=0; ib<bs; ib++) {
63           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
64           a_val++;
65         }
66       }
67     }
68   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
69     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
70       for (jb=0; jb<bs; jb++) {
71         for (ib=0; ib<bs; ib++) {
72           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
73           a_val++;
74         }
75       }
76     }
77   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
78   if (type == NORM_2) {
79     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
80   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
81     for (i=0; i<n; i++) reductions[i] /= m;
82   }
83   PetscFunctionReturn(0);
84 }
85 
86 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
87 {
88   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
89   PetscErrorCode ierr;
90   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
91   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
92   PetscReal      shift = 0.0;
93   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
94 
95   PetscFunctionBegin;
96   allowzeropivot = PetscNot(A->erroriffailure);
97 
98   if (a->idiagvalid) {
99     if (values) *values = a->idiag;
100     PetscFunctionReturn(0);
101   }
102   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
103   diag_offset = a->diag;
104   if (!a->idiag) {
105     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
106     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
107   }
108   diag  = a->idiag;
109   if (values) *values = a->idiag;
110   /* factor and invert each block */
111   switch (bs) {
112   case 1:
113     for (i=0; i<mbs; i++) {
114       odiag    = v + 1*diag_offset[i];
115       diag[0]  = odiag[0];
116 
117       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
118         if (allowzeropivot) {
119           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
120           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
121           A->factorerror_zeropivot_row   = i;
122           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
123         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
124       }
125 
126       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
127       diag    += 1;
128     }
129     break;
130   case 2:
131     for (i=0; i<mbs; i++) {
132       odiag    = v + 4*diag_offset[i];
133       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
134       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
135       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
136       diag    += 4;
137     }
138     break;
139   case 3:
140     for (i=0; i<mbs; i++) {
141       odiag    = v + 9*diag_offset[i];
142       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
143       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
144       diag[8]  = odiag[8];
145       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
146       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
147       diag    += 9;
148     }
149     break;
150   case 4:
151     for (i=0; i<mbs; i++) {
152       odiag  = v + 16*diag_offset[i];
153       ierr   = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr);
154       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
155       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
156       diag  += 16;
157     }
158     break;
159   case 5:
160     for (i=0; i<mbs; i++) {
161       odiag  = v + 25*diag_offset[i];
162       ierr   = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr);
163       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
164       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
165       diag  += 25;
166     }
167     break;
168   case 6:
169     for (i=0; i<mbs; i++) {
170       odiag  = v + 36*diag_offset[i];
171       ierr   = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr);
172       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
173       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
174       diag  += 36;
175     }
176     break;
177   case 7:
178     for (i=0; i<mbs; i++) {
179       odiag  = v + 49*diag_offset[i];
180       ierr   = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr);
181       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
182       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
183       diag  += 49;
184     }
185     break;
186   default:
187     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
188     for (i=0; i<mbs; i++) {
189       odiag  = v + bs2*diag_offset[i];
190       ierr   = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr);
191       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
192       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
193       diag  += bs2;
194     }
195     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
196   }
197   a->idiagvalid = PETSC_TRUE;
198   PetscFunctionReturn(0);
199 }
200 
201 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
202 {
203   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
204   PetscScalar       *x,*work,*w,*workt,*t;
205   const MatScalar   *v,*aa = a->a, *idiag;
206   const PetscScalar *b,*xb;
207   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
208   PetscErrorCode    ierr;
209   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
210   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
211 
212   PetscFunctionBegin;
213   its = its*lits;
214   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
215   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
216   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
217   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor");
218   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
219 
220   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
221 
222   if (!m) PetscFunctionReturn(0);
223   diag  = a->diag;
224   idiag = a->idiag;
225   k    = PetscMax(A->rmap->n,A->cmap->n);
226   if (!a->mult_work) {
227     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
228   }
229   if (!a->sor_workt) {
230     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
231   }
232   if (!a->sor_work) {
233     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
234   }
235   work = a->mult_work;
236   t    = a->sor_workt;
237   w    = a->sor_work;
238 
239   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
240   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
241 
242   if (flag & SOR_ZERO_INITIAL_GUESS) {
243     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
244       switch (bs) {
245       case 1:
246         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
247         t[0] = b[0];
248         i2     = 1;
249         idiag += 1;
250         for (i=1; i<m; i++) {
251           v  = aa + ai[i];
252           vi = aj + ai[i];
253           nz = diag[i] - ai[i];
254           s[0] = b[i2];
255           for (j=0; j<nz; j++) {
256             xw[0] = x[vi[j]];
257             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
258           }
259           t[i2] = s[0];
260           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
261           x[i2]  = xw[0];
262           idiag += 1;
263           i2    += 1;
264         }
265         break;
266       case 2:
267         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
268         t[0] = b[0]; t[1] = b[1];
269         i2     = 2;
270         idiag += 4;
271         for (i=1; i<m; i++) {
272           v  = aa + 4*ai[i];
273           vi = aj + ai[i];
274           nz = diag[i] - ai[i];
275           s[0] = b[i2]; s[1] = b[i2+1];
276           for (j=0; j<nz; j++) {
277             idx = 2*vi[j];
278             it  = 4*j;
279             xw[0] = x[idx]; xw[1] = x[1+idx];
280             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
281           }
282           t[i2] = s[0]; t[i2+1] = s[1];
283           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
284           x[i2]   = xw[0]; x[i2+1] = xw[1];
285           idiag  += 4;
286           i2     += 2;
287         }
288         break;
289       case 3:
290         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
291         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
292         i2     = 3;
293         idiag += 9;
294         for (i=1; i<m; i++) {
295           v  = aa + 9*ai[i];
296           vi = aj + ai[i];
297           nz = diag[i] - ai[i];
298           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
299           while (nz--) {
300             idx = 3*(*vi++);
301             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
302             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
303             v  += 9;
304           }
305           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
306           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
307           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
308           idiag  += 9;
309           i2     += 3;
310         }
311         break;
312       case 4:
313         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
314         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
315         i2     = 4;
316         idiag += 16;
317         for (i=1; i<m; i++) {
318           v  = aa + 16*ai[i];
319           vi = aj + ai[i];
320           nz = diag[i] - ai[i];
321           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
322           while (nz--) {
323             idx = 4*(*vi++);
324             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
325             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
326             v  += 16;
327           }
328           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
329           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
330           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
331           idiag  += 16;
332           i2     += 4;
333         }
334         break;
335       case 5:
336         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
337         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
338         i2     = 5;
339         idiag += 25;
340         for (i=1; i<m; i++) {
341           v  = aa + 25*ai[i];
342           vi = aj + ai[i];
343           nz = diag[i] - ai[i];
344           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
345           while (nz--) {
346             idx = 5*(*vi++);
347             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
348             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
349             v  += 25;
350           }
351           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
352           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
353           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
354           idiag  += 25;
355           i2     += 5;
356         }
357         break;
358       case 6:
359         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
360         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
361         i2     = 6;
362         idiag += 36;
363         for (i=1; i<m; i++) {
364           v  = aa + 36*ai[i];
365           vi = aj + ai[i];
366           nz = diag[i] - ai[i];
367           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
368           while (nz--) {
369             idx = 6*(*vi++);
370             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
371             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
372             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
373             v  += 36;
374           }
375           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
376           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
377           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
378           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
379           idiag  += 36;
380           i2     += 6;
381         }
382         break;
383       case 7:
384         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
385         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
386         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
387         i2     = 7;
388         idiag += 49;
389         for (i=1; i<m; i++) {
390           v  = aa + 49*ai[i];
391           vi = aj + ai[i];
392           nz = diag[i] - ai[i];
393           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
394           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
395           while (nz--) {
396             idx = 7*(*vi++);
397             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
398             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
399             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
400             v  += 49;
401           }
402           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
403           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
404           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
405           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
406           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
407           idiag  += 49;
408           i2     += 7;
409         }
410         break;
411       default:
412         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
413         ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr);
414         i2     = bs;
415         idiag += bs2;
416         for (i=1; i<m; i++) {
417           v  = aa + bs2*ai[i];
418           vi = aj + ai[i];
419           nz = diag[i] - ai[i];
420 
421           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
422           /* copy all rows of x that are needed into contiguous space */
423           workt = work;
424           for (j=0; j<nz; j++) {
425             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
426             workt += bs;
427           }
428           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
429           ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr);
430           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
431 
432           idiag += bs2;
433           i2    += bs;
434         }
435         break;
436       }
437       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
438       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
439       xb = t;
440     }
441     else xb = b;
442     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
443       idiag = a->idiag+bs2*(a->mbs-1);
444       i2 = bs * (m-1);
445       switch (bs) {
446       case 1:
447         s[0]  = xb[i2];
448         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
449         x[i2] = xw[0];
450         i2   -= 1;
451         for (i=m-2; i>=0; i--) {
452           v  = aa + (diag[i]+1);
453           vi = aj + diag[i] + 1;
454           nz = ai[i+1] - diag[i] - 1;
455           s[0] = xb[i2];
456           for (j=0; j<nz; j++) {
457             xw[0] = x[vi[j]];
458             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
459           }
460           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
461           x[i2]  = xw[0];
462           idiag -= 1;
463           i2    -= 1;
464         }
465         break;
466       case 2:
467         s[0]  = xb[i2]; s[1] = xb[i2+1];
468         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
469         x[i2] = xw[0]; x[i2+1] = xw[1];
470         i2    -= 2;
471         idiag -= 4;
472         for (i=m-2; i>=0; i--) {
473           v  = aa + 4*(diag[i] + 1);
474           vi = aj + diag[i] + 1;
475           nz = ai[i+1] - diag[i] - 1;
476           s[0] = xb[i2]; s[1] = xb[i2+1];
477           for (j=0; j<nz; j++) {
478             idx = 2*vi[j];
479             it  = 4*j;
480             xw[0] = x[idx]; xw[1] = x[1+idx];
481             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
482           }
483           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
484           x[i2]   = xw[0]; x[i2+1] = xw[1];
485           idiag  -= 4;
486           i2     -= 2;
487         }
488         break;
489       case 3:
490         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
491         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
492         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
493         i2    -= 3;
494         idiag -= 9;
495         for (i=m-2; i>=0; i--) {
496           v  = aa + 9*(diag[i]+1);
497           vi = aj + diag[i] + 1;
498           nz = ai[i+1] - diag[i] - 1;
499           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
500           while (nz--) {
501             idx = 3*(*vi++);
502             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
503             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
504             v  += 9;
505           }
506           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
507           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
508           idiag  -= 9;
509           i2     -= 3;
510         }
511         break;
512       case 4:
513         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
514         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
515         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
516         i2    -= 4;
517         idiag -= 16;
518         for (i=m-2; i>=0; i--) {
519           v  = aa + 16*(diag[i]+1);
520           vi = aj + diag[i] + 1;
521           nz = ai[i+1] - diag[i] - 1;
522           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
523           while (nz--) {
524             idx = 4*(*vi++);
525             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
526             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
527             v  += 16;
528           }
529           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
530           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
531           idiag  -= 16;
532           i2     -= 4;
533         }
534         break;
535       case 5:
536         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
537         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
538         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
539         i2    -= 5;
540         idiag -= 25;
541         for (i=m-2; i>=0; i--) {
542           v  = aa + 25*(diag[i]+1);
543           vi = aj + diag[i] + 1;
544           nz = ai[i+1] - diag[i] - 1;
545           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
546           while (nz--) {
547             idx = 5*(*vi++);
548             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
549             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
550             v  += 25;
551           }
552           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
553           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
554           idiag  -= 25;
555           i2     -= 5;
556         }
557         break;
558       case 6:
559         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
560         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
561         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
562         i2    -= 6;
563         idiag -= 36;
564         for (i=m-2; i>=0; i--) {
565           v  = aa + 36*(diag[i]+1);
566           vi = aj + diag[i] + 1;
567           nz = ai[i+1] - diag[i] - 1;
568           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
569           while (nz--) {
570             idx = 6*(*vi++);
571             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
572             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
573             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
574             v  += 36;
575           }
576           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
577           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
578           idiag  -= 36;
579           i2     -= 6;
580         }
581         break;
582       case 7:
583         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
584         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
585         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
586         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
587         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
588         i2    -= 7;
589         idiag -= 49;
590         for (i=m-2; i>=0; i--) {
591           v  = aa + 49*(diag[i]+1);
592           vi = aj + diag[i] + 1;
593           nz = ai[i+1] - diag[i] - 1;
594           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
595           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
596           while (nz--) {
597             idx = 7*(*vi++);
598             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
599             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
600             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
601             v  += 49;
602           }
603           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
604           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
605           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
606           idiag  -= 49;
607           i2     -= 7;
608         }
609         break;
610       default:
611         ierr  = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
612         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
613         i2    -= bs;
614         idiag -= bs2;
615         for (i=m-2; i>=0; i--) {
616           v  = aa + bs2*(diag[i]+1);
617           vi = aj + diag[i] + 1;
618           nz = ai[i+1] - diag[i] - 1;
619 
620           ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
621           /* copy all rows of x that are needed into contiguous space */
622           workt = work;
623           for (j=0; j<nz; j++) {
624             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
625             workt += bs;
626           }
627           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
628           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
629 
630           idiag -= bs2;
631           i2    -= bs;
632         }
633         break;
634       }
635       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
636     }
637     its--;
638   }
639   while (its--) {
640     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
641       idiag = a->idiag;
642       i2 = 0;
643       switch (bs) {
644       case 1:
645         for (i=0; i<m; i++) {
646           v  = aa + ai[i];
647           vi = aj + ai[i];
648           nz = ai[i+1] - ai[i];
649           s[0] = b[i2];
650           for (j=0; j<nz; j++) {
651             xw[0] = x[vi[j]];
652             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
653           }
654           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
655           x[i2] += xw[0];
656           idiag += 1;
657           i2    += 1;
658         }
659         break;
660       case 2:
661         for (i=0; i<m; i++) {
662           v  = aa + 4*ai[i];
663           vi = aj + ai[i];
664           nz = ai[i+1] - ai[i];
665           s[0] = b[i2]; s[1] = b[i2+1];
666           for (j=0; j<nz; j++) {
667             idx = 2*vi[j];
668             it  = 4*j;
669             xw[0] = x[idx]; xw[1] = x[1+idx];
670             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
671           }
672           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
673           x[i2]  += xw[0]; x[i2+1] += xw[1];
674           idiag  += 4;
675           i2     += 2;
676         }
677         break;
678       case 3:
679         for (i=0; i<m; i++) {
680           v  = aa + 9*ai[i];
681           vi = aj + ai[i];
682           nz = ai[i+1] - ai[i];
683           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
684           while (nz--) {
685             idx = 3*(*vi++);
686             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
687             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
688             v  += 9;
689           }
690           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
691           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
692           idiag  += 9;
693           i2     += 3;
694         }
695         break;
696       case 4:
697         for (i=0; i<m; i++) {
698           v  = aa + 16*ai[i];
699           vi = aj + ai[i];
700           nz = ai[i+1] - ai[i];
701           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
702           while (nz--) {
703             idx = 4*(*vi++);
704             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
705             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
706             v  += 16;
707           }
708           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
709           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
710           idiag  += 16;
711           i2     += 4;
712         }
713         break;
714       case 5:
715         for (i=0; i<m; i++) {
716           v  = aa + 25*ai[i];
717           vi = aj + ai[i];
718           nz = ai[i+1] - ai[i];
719           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
720           while (nz--) {
721             idx = 5*(*vi++);
722             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
723             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
724             v  += 25;
725           }
726           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
727           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
728           idiag  += 25;
729           i2     += 5;
730         }
731         break;
732       case 6:
733         for (i=0; i<m; i++) {
734           v  = aa + 36*ai[i];
735           vi = aj + ai[i];
736           nz = ai[i+1] - ai[i];
737           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
738           while (nz--) {
739             idx = 6*(*vi++);
740             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
741             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
742             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
743             v  += 36;
744           }
745           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
746           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
747           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
748           idiag  += 36;
749           i2     += 6;
750         }
751         break;
752       case 7:
753         for (i=0; i<m; i++) {
754           v  = aa + 49*ai[i];
755           vi = aj + ai[i];
756           nz = ai[i+1] - ai[i];
757           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
758           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
759           while (nz--) {
760             idx = 7*(*vi++);
761             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
762             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
763             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
764             v  += 49;
765           }
766           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
767           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
768           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
769           idiag  += 49;
770           i2     += 7;
771         }
772         break;
773       default:
774         for (i=0; i<m; i++) {
775           v  = aa + bs2*ai[i];
776           vi = aj + ai[i];
777           nz = ai[i+1] - ai[i];
778 
779           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
780           /* copy all rows of x that are needed into contiguous space */
781           workt = work;
782           for (j=0; j<nz; j++) {
783             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
784             workt += bs;
785           }
786           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
787           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
788 
789           idiag += bs2;
790           i2    += bs;
791         }
792         break;
793       }
794       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
795     }
796     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
797       idiag = a->idiag+bs2*(a->mbs-1);
798       i2 = bs * (m-1);
799       switch (bs) {
800       case 1:
801         for (i=m-1; i>=0; i--) {
802           v  = aa + ai[i];
803           vi = aj + ai[i];
804           nz = ai[i+1] - ai[i];
805           s[0] = b[i2];
806           for (j=0; j<nz; j++) {
807             xw[0] = x[vi[j]];
808             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
809           }
810           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
811           x[i2] += xw[0];
812           idiag -= 1;
813           i2    -= 1;
814         }
815         break;
816       case 2:
817         for (i=m-1; i>=0; i--) {
818           v  = aa + 4*ai[i];
819           vi = aj + ai[i];
820           nz = ai[i+1] - ai[i];
821           s[0] = b[i2]; s[1] = b[i2+1];
822           for (j=0; j<nz; j++) {
823             idx = 2*vi[j];
824             it  = 4*j;
825             xw[0] = x[idx]; xw[1] = x[1+idx];
826             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
827           }
828           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
829           x[i2]  += xw[0]; x[i2+1] += xw[1];
830           idiag  -= 4;
831           i2     -= 2;
832         }
833         break;
834       case 3:
835         for (i=m-1; i>=0; i--) {
836           v  = aa + 9*ai[i];
837           vi = aj + ai[i];
838           nz = ai[i+1] - ai[i];
839           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
840           while (nz--) {
841             idx = 3*(*vi++);
842             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
843             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
844             v  += 9;
845           }
846           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
847           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
848           idiag  -= 9;
849           i2     -= 3;
850         }
851         break;
852       case 4:
853         for (i=m-1; i>=0; i--) {
854           v  = aa + 16*ai[i];
855           vi = aj + ai[i];
856           nz = ai[i+1] - ai[i];
857           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
858           while (nz--) {
859             idx = 4*(*vi++);
860             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
861             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
862             v  += 16;
863           }
864           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
865           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
866           idiag  -= 16;
867           i2     -= 4;
868         }
869         break;
870       case 5:
871         for (i=m-1; i>=0; i--) {
872           v  = aa + 25*ai[i];
873           vi = aj + ai[i];
874           nz = ai[i+1] - ai[i];
875           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
876           while (nz--) {
877             idx = 5*(*vi++);
878             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
879             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
880             v  += 25;
881           }
882           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
883           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
884           idiag  -= 25;
885           i2     -= 5;
886         }
887         break;
888       case 6:
889         for (i=m-1; i>=0; i--) {
890           v  = aa + 36*ai[i];
891           vi = aj + ai[i];
892           nz = ai[i+1] - ai[i];
893           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
894           while (nz--) {
895             idx = 6*(*vi++);
896             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
897             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
898             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
899             v  += 36;
900           }
901           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
902           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
903           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
904           idiag  -= 36;
905           i2     -= 6;
906         }
907         break;
908       case 7:
909         for (i=m-1; i>=0; i--) {
910           v  = aa + 49*ai[i];
911           vi = aj + ai[i];
912           nz = ai[i+1] - ai[i];
913           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
914           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
915           while (nz--) {
916             idx = 7*(*vi++);
917             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
918             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
919             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
920             v  += 49;
921           }
922           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
923           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
924           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
925           idiag  -= 49;
926           i2     -= 7;
927         }
928         break;
929       default:
930         for (i=m-1; i>=0; i--) {
931           v  = aa + bs2*ai[i];
932           vi = aj + ai[i];
933           nz = ai[i+1] - ai[i];
934 
935           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
936           /* copy all rows of x that are needed into contiguous space */
937           workt = work;
938           for (j=0; j<nz; j++) {
939             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
940             workt += bs;
941           }
942           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
943           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
944 
945           idiag -= bs2;
946           i2    -= bs;
947         }
948         break;
949       }
950       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
951     }
952   }
953   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
954   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
955   PetscFunctionReturn(0);
956 }
957 
958 /*
959     Special version for direct calls from Fortran (Used in PETSc-fun3d)
960 */
961 #if defined(PETSC_HAVE_FORTRAN_CAPS)
962 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
963 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
964 #define matsetvaluesblocked4_ matsetvaluesblocked4
965 #endif
966 
967 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
968 {
969   Mat               A  = *AA;
970   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
971   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
972   PetscInt          *ai    =a->i,*ailen=a->ilen;
973   PetscInt          *aj    =a->j,stepval,lastcol = -1;
974   const PetscScalar *value = v;
975   MatScalar         *ap,*aa = a->a,*bap;
976   PetscErrorCode    ierr;
977 
978   PetscFunctionBegin;
979   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
980   stepval = (n-1)*4;
981   for (k=0; k<m; k++) { /* loop over added rows */
982     row  = im[k];
983     rp   = aj + ai[row];
984     ap   = aa + 16*ai[row];
985     nrow = ailen[row];
986     low  = 0;
987     high = nrow;
988     for (l=0; l<n; l++) { /* loop over added columns */
989       col = in[l];
990       if (col <= lastcol)  low = 0;
991       else                high = nrow;
992       lastcol = col;
993       value   = v + k*(stepval+4 + l)*4;
994       while (high-low > 7) {
995         t = (low+high)/2;
996         if (rp[t] > col) high = t;
997         else             low  = t;
998       }
999       for (i=low; i<high; i++) {
1000         if (rp[i] > col) break;
1001         if (rp[i] == col) {
1002           bap = ap +  16*i;
1003           for (ii=0; ii<4; ii++,value+=stepval) {
1004             for (jj=ii; jj<16; jj+=4) {
1005               bap[jj] += *value++;
1006             }
1007           }
1008           goto noinsert2;
1009         }
1010       }
1011       N = nrow++ - 1;
1012       high++; /* added new column index thus must search to one higher than before */
1013       /* shift up all the later entries in this row */
1014       for (ii=N; ii>=i; ii--) {
1015         rp[ii+1] = rp[ii];
1016         ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
1017       }
1018       if (N >= i) {
1019         ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1020       }
1021       rp[i] = col;
1022       bap   = ap +  16*i;
1023       for (ii=0; ii<4; ii++,value+=stepval) {
1024         for (jj=ii; jj<16; jj+=4) {
1025           bap[jj] = *value++;
1026         }
1027       }
1028       noinsert2:;
1029       low = i;
1030     }
1031     ailen[row] = nrow;
1032   }
1033   PetscFunctionReturnVoid();
1034 }
1035 
1036 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1037 #define matsetvalues4_ MATSETVALUES4
1038 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1039 #define matsetvalues4_ matsetvalues4
1040 #endif
1041 
1042 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1043 {
1044   Mat         A  = *AA;
1045   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1046   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
1047   PetscInt    *ai=a->i,*ailen=a->ilen;
1048   PetscInt    *aj=a->j,brow,bcol;
1049   PetscInt    ridx,cidx,lastcol = -1;
1050   MatScalar   *ap,value,*aa=a->a,*bap;
1051   PetscErrorCode ierr;
1052 
1053   PetscFunctionBegin;
1054   for (k=0; k<m; k++) { /* loop over added rows */
1055     row  = im[k]; brow = row/4;
1056     rp   = aj + ai[brow];
1057     ap   = aa + 16*ai[brow];
1058     nrow = ailen[brow];
1059     low  = 0;
1060     high = nrow;
1061     for (l=0; l<n; l++) { /* loop over added columns */
1062       col   = in[l]; bcol = col/4;
1063       ridx  = row % 4; cidx = col % 4;
1064       value = v[l + k*n];
1065       if (col <= lastcol)  low = 0;
1066       else                high = nrow;
1067       lastcol = col;
1068       while (high-low > 7) {
1069         t = (low+high)/2;
1070         if (rp[t] > bcol) high = t;
1071         else              low  = t;
1072       }
1073       for (i=low; i<high; i++) {
1074         if (rp[i] > bcol) break;
1075         if (rp[i] == bcol) {
1076           bap   = ap +  16*i + 4*cidx + ridx;
1077           *bap += value;
1078           goto noinsert1;
1079         }
1080       }
1081       N = nrow++ - 1;
1082       high++; /* added new column thus must search to one higher than before */
1083       /* shift up all the later entries in this row */
1084       ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
1085       ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
1086       ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1087       rp[i]                    = bcol;
1088       ap[16*i + 4*cidx + ridx] = value;
1089 noinsert1:;
1090       low = i;
1091     }
1092     ailen[brow] = nrow;
1093   }
1094   PetscFunctionReturnVoid();
1095 }
1096 
1097 /*
1098      Checks for missing diagonals
1099 */
1100 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1101 {
1102   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1103   PetscErrorCode ierr;
1104   PetscInt       *diag,*ii = a->i,i;
1105 
1106   PetscFunctionBegin;
1107   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1108   *missing = PETSC_FALSE;
1109   if (A->rmap->n > 0 && !ii) {
1110     *missing = PETSC_TRUE;
1111     if (d) *d = 0;
1112     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
1113   } else {
1114     PetscInt n;
1115     n = PetscMin(a->mbs, a->nbs);
1116     diag = a->diag;
1117     for (i=0; i<n; i++) {
1118       if (diag[i] >= ii[i+1]) {
1119         *missing = PETSC_TRUE;
1120         if (d) *d = i;
1121         ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr);
1122         break;
1123       }
1124     }
1125   }
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1130 {
1131   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1132   PetscErrorCode ierr;
1133   PetscInt       i,j,m = a->mbs;
1134 
1135   PetscFunctionBegin;
1136   if (!a->diag) {
1137     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1138     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1139     a->free_diag = PETSC_TRUE;
1140   }
1141   for (i=0; i<m; i++) {
1142     a->diag[i] = a->i[i+1];
1143     for (j=a->i[i]; j<a->i[i+1]; j++) {
1144       if (a->j[j] == i) {
1145         a->diag[i] = j;
1146         break;
1147       }
1148     }
1149   }
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1154 {
1155   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1156   PetscErrorCode ierr;
1157   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1158   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1159 
1160   PetscFunctionBegin;
1161   *nn = n;
1162   if (!ia) PetscFunctionReturn(0);
1163   if (symmetric) {
1164     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1165     nz   = tia[n];
1166   } else {
1167     tia = a->i; tja = a->j;
1168   }
1169 
1170   if (!blockcompressed && bs > 1) {
1171     (*nn) *= bs;
1172     /* malloc & create the natural set of indices */
1173     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1174     if (n) {
1175       (*ia)[0] = oshift;
1176       for (j=1; j<bs; j++) {
1177         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1178       }
1179     }
1180 
1181     for (i=1; i<n; i++) {
1182       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1183       for (j=1; j<bs; j++) {
1184         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1185       }
1186     }
1187     if (n) {
1188       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1189     }
1190 
1191     if (inja) {
1192       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1193       cnt = 0;
1194       for (i=0; i<n; i++) {
1195         for (j=0; j<bs; j++) {
1196           for (k=tia[i]; k<tia[i+1]; k++) {
1197             for (l=0; l<bs; l++) {
1198               (*ja)[cnt++] = bs*tja[k] + l;
1199             }
1200           }
1201         }
1202       }
1203     }
1204 
1205     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1206       ierr = PetscFree(tia);CHKERRQ(ierr);
1207       ierr = PetscFree(tja);CHKERRQ(ierr);
1208     }
1209   } else if (oshift == 1) {
1210     if (symmetric) {
1211       nz = tia[A->rmap->n/bs];
1212       /*  add 1 to i and j indices */
1213       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1214       *ia = tia;
1215       if (ja) {
1216         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1217         *ja = tja;
1218       }
1219     } else {
1220       nz = a->i[A->rmap->n/bs];
1221       /* malloc space and  add 1 to i and j indices */
1222       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1223       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1224       if (ja) {
1225         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1226         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1227       }
1228     }
1229   } else {
1230     *ia = tia;
1231     if (ja) *ja = tja;
1232   }
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1237 {
1238   PetscErrorCode ierr;
1239 
1240   PetscFunctionBegin;
1241   if (!ia) PetscFunctionReturn(0);
1242   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1243     ierr = PetscFree(*ia);CHKERRQ(ierr);
1244     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1245   }
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1250 {
1251   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1252   PetscErrorCode ierr;
1253 
1254   PetscFunctionBegin;
1255 #if defined(PETSC_USE_LOG)
1256   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1257 #endif
1258   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1259   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1260   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1261   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1262   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1263   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1264   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1265   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1266   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1267   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1268   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1269   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1270   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1271 
1272   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1273   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1274   ierr = PetscFree(A->data);CHKERRQ(ierr);
1275 
1276   ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1288 #if defined(PETSC_HAVE_HYPRE)
1289   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
1290 #endif
1291   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1296 {
1297   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1298   PetscErrorCode ierr;
1299 
1300   PetscFunctionBegin;
1301   switch (op) {
1302   case MAT_ROW_ORIENTED:
1303     a->roworiented = flg;
1304     break;
1305   case MAT_KEEP_NONZERO_PATTERN:
1306     a->keepnonzeropattern = flg;
1307     break;
1308   case MAT_NEW_NONZERO_LOCATIONS:
1309     a->nonew = (flg ? 0 : 1);
1310     break;
1311   case MAT_NEW_NONZERO_LOCATION_ERR:
1312     a->nonew = (flg ? -1 : 0);
1313     break;
1314   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1315     a->nonew = (flg ? -2 : 0);
1316     break;
1317   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1318     a->nounused = (flg ? -1 : 0);
1319     break;
1320   case MAT_FORCE_DIAGONAL_ENTRIES:
1321   case MAT_IGNORE_OFF_PROC_ENTRIES:
1322   case MAT_USE_HASH_TABLE:
1323   case MAT_SORTED_FULL:
1324     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1325     break;
1326   case MAT_SPD:
1327   case MAT_SYMMETRIC:
1328   case MAT_STRUCTURALLY_SYMMETRIC:
1329   case MAT_HERMITIAN:
1330   case MAT_SYMMETRY_ETERNAL:
1331   case MAT_SUBMAT_SINGLEIS:
1332   case MAT_STRUCTURE_ONLY:
1333     /* These options are handled directly by MatSetOption() */
1334     break;
1335   default:
1336     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1337   }
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1342 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1343 {
1344   PetscErrorCode ierr;
1345   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1346   MatScalar      *aa_i;
1347   PetscScalar    *v_i;
1348 
1349   PetscFunctionBegin;
1350   bs  = A->rmap->bs;
1351   bs2 = bs*bs;
1352   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1353 
1354   bn  = row/bs;   /* Block number */
1355   bp  = row % bs; /* Block Position */
1356   M   = ai[bn+1] - ai[bn];
1357   *nz = bs*M;
1358 
1359   if (v) {
1360     *v = NULL;
1361     if (*nz) {
1362       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1363       for (i=0; i<M; i++) { /* for each block in the block row */
1364         v_i  = *v + i*bs;
1365         aa_i = aa + bs2*(ai[bn] + i);
1366         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1367       }
1368     }
1369   }
1370 
1371   if (idx) {
1372     *idx = NULL;
1373     if (*nz) {
1374       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1375       for (i=0; i<M; i++) { /* for each block in the block row */
1376         idx_i = *idx + i*bs;
1377         itmp  = bs*aj[ai[bn] + i];
1378         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1379       }
1380     }
1381   }
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1386 {
1387   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1388   PetscErrorCode ierr;
1389 
1390   PetscFunctionBegin;
1391   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1396 {
1397   PetscErrorCode ierr;
1398 
1399   PetscFunctionBegin;
1400   if (nz)  *nz = 0;
1401   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1402   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1407 {
1408   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1409   Mat            C;
1410   PetscErrorCode ierr;
1411   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1412   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1413   MatScalar      *ata,*aa=a->a;
1414 
1415   PetscFunctionBegin;
1416   ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr);
1417   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1418     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1419 
1420     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1421     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1422     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1423     ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr);
1424 
1425     at  = (Mat_SeqBAIJ*)C->data;
1426     ati = at->i;
1427     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1428   } else {
1429     C = *B;
1430     at = (Mat_SeqBAIJ*)C->data;
1431     ati = at->i;
1432   }
1433 
1434   atj = at->j;
1435   ata = at->a;
1436 
1437   /* Copy ati into atfill so we have locations of the next free space in atj */
1438   ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr);
1439 
1440   /* Walk through A row-wise and mark nonzero entries of A^T. */
1441   for (i=0; i<mbs; i++) {
1442     anzj = ai[i+1] - ai[i];
1443     for (j=0; j<anzj; j++) {
1444       atj[atfill[*aj]] = i;
1445       for (kr=0; kr<bs; kr++) {
1446         for (k=0; k<bs; k++) {
1447           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1448         }
1449       }
1450       atfill[*aj++] += 1;
1451     }
1452   }
1453   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1454   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1455 
1456   /* Clean up temporary space and complete requests. */
1457   ierr = PetscFree(atfill);CHKERRQ(ierr);
1458 
1459   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1460     ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1461     *B = C;
1462   } else {
1463     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1464   }
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1469 {
1470   PetscErrorCode ierr;
1471   Mat            Btrans;
1472 
1473   PetscFunctionBegin;
1474   *f   = PETSC_FALSE;
1475   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1476   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1477   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1482 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
1483 {
1484   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)mat->data;
1485   PetscInt       header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1486   PetscInt       *rowlens,*colidxs;
1487   PetscScalar    *matvals;
1488   PetscErrorCode ierr;
1489 
1490   PetscFunctionBegin;
1491   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1492 
1493   M  = mat->rmap->N;
1494   N  = mat->cmap->N;
1495   m  = mat->rmap->n;
1496   bs = mat->rmap->bs;
1497   nz = bs*bs*A->nz;
1498 
1499   /* write matrix header */
1500   header[0] = MAT_FILE_CLASSID;
1501   header[1] = M; header[2] = N; header[3] = nz;
1502   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1503 
1504   /* store row lengths */
1505   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1506   for (cnt=0, i=0; i<A->mbs; i++)
1507     for (j=0; j<bs; j++)
1508       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1509   ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr);
1510   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1511 
1512   /* store column indices  */
1513   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1514   for (cnt=0, i=0; i<A->mbs; i++)
1515     for (k=0; k<bs; k++)
1516       for (j=A->i[i]; j<A->i[i+1]; j++)
1517         for (l=0; l<bs; l++)
1518           colidxs[cnt++] = bs*A->j[j] + l;
1519   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1520   ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr);
1521   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1522 
1523   /* store nonzero values */
1524   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1525   for (cnt=0, i=0; i<A->mbs; i++)
1526     for (k=0; k<bs; k++)
1527       for (j=A->i[i]; j<A->i[i+1]; j++)
1528         for (l=0; l<bs; l++)
1529           matvals[cnt++] = A->a[bs*(bs*j + l) + k];
1530   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1531   ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1532   ierr = PetscFree(matvals);CHKERRQ(ierr);
1533 
1534   /* write block size option to the viewer's .info file */
1535   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1540 {
1541   PetscErrorCode ierr;
1542   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1543   PetscInt       i,bs = A->rmap->bs,k;
1544 
1545   PetscFunctionBegin;
1546   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1547   for (i=0; i<a->mbs; i++) {
1548     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1549     for (k=a->i[i]; k<a->i[i+1]; k++) {
1550       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1551     }
1552     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1553   }
1554   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1559 {
1560   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1561   PetscErrorCode    ierr;
1562   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1563   PetscViewerFormat format;
1564 
1565   PetscFunctionBegin;
1566   if (A->structure_only) {
1567     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1568     PetscFunctionReturn(0);
1569   }
1570 
1571   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1572   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1573     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1574   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1575     const char *matname;
1576     Mat        aij;
1577     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1578     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1579     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1580     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1581     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1582   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1583       PetscFunctionReturn(0);
1584   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1585     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1586     for (i=0; i<a->mbs; i++) {
1587       for (j=0; j<bs; j++) {
1588         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1589         for (k=a->i[i]; k<a->i[i+1]; k++) {
1590           for (l=0; l<bs; l++) {
1591 #if defined(PETSC_USE_COMPLEX)
1592             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1593               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1594                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1595             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1596               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1597                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1598             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1599               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1600             }
1601 #else
1602             if (a->a[bs2*k + l*bs + j] != 0.0) {
1603               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1604             }
1605 #endif
1606           }
1607         }
1608         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1609       }
1610     }
1611     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1612   } else {
1613     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1614     for (i=0; i<a->mbs; i++) {
1615       for (j=0; j<bs; j++) {
1616         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1617         for (k=a->i[i]; k<a->i[i+1]; k++) {
1618           for (l=0; l<bs; l++) {
1619 #if defined(PETSC_USE_COMPLEX)
1620             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1621               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1622                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1623             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1624               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1625                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1626             } else {
1627               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1628             }
1629 #else
1630             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1631 #endif
1632           }
1633         }
1634         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1635       }
1636     }
1637     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1638   }
1639   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1640   PetscFunctionReturn(0);
1641 }
1642 
1643 #include <petscdraw.h>
1644 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1645 {
1646   Mat               A = (Mat) Aa;
1647   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1648   PetscErrorCode    ierr;
1649   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1650   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1651   MatScalar         *aa;
1652   PetscViewer       viewer;
1653   PetscViewerFormat format;
1654 
1655   PetscFunctionBegin;
1656   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1657   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1658   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1659 
1660   /* loop over matrix elements drawing boxes */
1661 
1662   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1663     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1664     /* Blue for negative, Cyan for zero and  Red for positive */
1665     color = PETSC_DRAW_BLUE;
1666     for (i=0,row=0; i<mbs; i++,row+=bs) {
1667       for (j=a->i[i]; j<a->i[i+1]; j++) {
1668         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1669         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1670         aa  = a->a + j*bs2;
1671         for (k=0; k<bs; k++) {
1672           for (l=0; l<bs; l++) {
1673             if (PetscRealPart(*aa++) >=  0.) continue;
1674             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1675           }
1676         }
1677       }
1678     }
1679     color = PETSC_DRAW_CYAN;
1680     for (i=0,row=0; i<mbs; i++,row+=bs) {
1681       for (j=a->i[i]; j<a->i[i+1]; j++) {
1682         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1683         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1684         aa  = a->a + j*bs2;
1685         for (k=0; k<bs; k++) {
1686           for (l=0; l<bs; l++) {
1687             if (PetscRealPart(*aa++) != 0.) continue;
1688             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1689           }
1690         }
1691       }
1692     }
1693     color = PETSC_DRAW_RED;
1694     for (i=0,row=0; i<mbs; i++,row+=bs) {
1695       for (j=a->i[i]; j<a->i[i+1]; j++) {
1696         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1697         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1698         aa  = a->a + j*bs2;
1699         for (k=0; k<bs; k++) {
1700           for (l=0; l<bs; l++) {
1701             if (PetscRealPart(*aa++) <= 0.) continue;
1702             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1703           }
1704         }
1705       }
1706     }
1707     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1708   } else {
1709     /* use contour shading to indicate magnitude of values */
1710     /* first determine max of all nonzero values */
1711     PetscReal minv = 0.0, maxv = 0.0;
1712     PetscDraw popup;
1713 
1714     for (i=0; i<a->nz*a->bs2; i++) {
1715       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1716     }
1717     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1718     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1719     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1720 
1721     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1722     for (i=0,row=0; i<mbs; i++,row+=bs) {
1723       for (j=a->i[i]; j<a->i[i+1]; j++) {
1724         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1725         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1726         aa  = a->a + j*bs2;
1727         for (k=0; k<bs; k++) {
1728           for (l=0; l<bs; l++) {
1729             MatScalar v = *aa++;
1730             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1731             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1732           }
1733         }
1734       }
1735     }
1736     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1737   }
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1742 {
1743   PetscErrorCode ierr;
1744   PetscReal      xl,yl,xr,yr,w,h;
1745   PetscDraw      draw;
1746   PetscBool      isnull;
1747 
1748   PetscFunctionBegin;
1749   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1750   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1751   if (isnull) PetscFunctionReturn(0);
1752 
1753   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1754   xr  += w;          yr += h;        xl = -w;     yl = -h;
1755   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1756   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1757   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1758   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1759   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1764 {
1765   PetscErrorCode ierr;
1766   PetscBool      iascii,isbinary,isdraw;
1767 
1768   PetscFunctionBegin;
1769   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1770   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1771   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1772   if (iascii) {
1773     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1774   } else if (isbinary) {
1775     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1776   } else if (isdraw) {
1777     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1778   } else {
1779     Mat B;
1780     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1781     ierr = MatView(B,viewer);CHKERRQ(ierr);
1782     ierr = MatDestroy(&B);CHKERRQ(ierr);
1783   }
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1788 {
1789   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1790   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1791   PetscInt    *ai = a->i,*ailen = a->ilen;
1792   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1793   MatScalar   *ap,*aa = a->a;
1794 
1795   PetscFunctionBegin;
1796   for (k=0; k<m; k++) { /* loop over rows */
1797     row = im[k]; brow = row/bs;
1798     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1799     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1800     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1801     nrow = ailen[brow];
1802     for (l=0; l<n; l++) { /* loop over columns */
1803       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1804       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1805       col  = in[l];
1806       bcol = col/bs;
1807       cidx = col%bs;
1808       ridx = row%bs;
1809       high = nrow;
1810       low  = 0; /* assume unsorted */
1811       while (high-low > 5) {
1812         t = (low+high)/2;
1813         if (rp[t] > bcol) high = t;
1814         else             low  = t;
1815       }
1816       for (i=low; i<high; i++) {
1817         if (rp[i] > bcol) break;
1818         if (rp[i] == bcol) {
1819           *v++ = ap[bs2*i+bs*cidx+ridx];
1820           goto finished;
1821         }
1822       }
1823       *v++ = 0.0;
1824 finished:;
1825     }
1826   }
1827   PetscFunctionReturn(0);
1828 }
1829 
1830 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1831 {
1832   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1833   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1834   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1835   PetscErrorCode    ierr;
1836   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1837   PetscBool         roworiented=a->roworiented;
1838   const PetscScalar *value     = v;
1839   MatScalar         *ap=NULL,*aa = a->a,*bap;
1840 
1841   PetscFunctionBegin;
1842   if (roworiented) {
1843     stepval = (n-1)*bs;
1844   } else {
1845     stepval = (m-1)*bs;
1846   }
1847   for (k=0; k<m; k++) { /* loop over added rows */
1848     row = im[k];
1849     if (row < 0) continue;
1850     if (PetscUnlikelyDebug(row >= a->mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1851     rp   = aj + ai[row];
1852     if (!A->structure_only) ap = aa + bs2*ai[row];
1853     rmax = imax[row];
1854     nrow = ailen[row];
1855     low  = 0;
1856     high = nrow;
1857     for (l=0; l<n; l++) { /* loop over added columns */
1858       if (in[l] < 0) continue;
1859       if (PetscUnlikelyDebug(in[l] >= a->nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1860       col = in[l];
1861       if (!A->structure_only) {
1862         if (roworiented) {
1863           value = v + (k*(stepval+bs) + l)*bs;
1864         } else {
1865           value = v + (l*(stepval+bs) + k)*bs;
1866         }
1867       }
1868       if (col <= lastcol) low = 0;
1869       else high = nrow;
1870       lastcol = col;
1871       while (high-low > 7) {
1872         t = (low+high)/2;
1873         if (rp[t] > col) high = t;
1874         else             low  = t;
1875       }
1876       for (i=low; i<high; i++) {
1877         if (rp[i] > col) break;
1878         if (rp[i] == col) {
1879           if (A->structure_only) goto noinsert2;
1880           bap = ap +  bs2*i;
1881           if (roworiented) {
1882             if (is == ADD_VALUES) {
1883               for (ii=0; ii<bs; ii++,value+=stepval) {
1884                 for (jj=ii; jj<bs2; jj+=bs) {
1885                   bap[jj] += *value++;
1886                 }
1887               }
1888             } else {
1889               for (ii=0; ii<bs; ii++,value+=stepval) {
1890                 for (jj=ii; jj<bs2; jj+=bs) {
1891                   bap[jj] = *value++;
1892                 }
1893               }
1894             }
1895           } else {
1896             if (is == ADD_VALUES) {
1897               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1898                 for (jj=0; jj<bs; jj++) {
1899                   bap[jj] += value[jj];
1900                 }
1901                 bap += bs;
1902               }
1903             } else {
1904               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1905                 for (jj=0; jj<bs; jj++) {
1906                   bap[jj]  = value[jj];
1907                 }
1908                 bap += bs;
1909               }
1910             }
1911           }
1912           goto noinsert2;
1913         }
1914       }
1915       if (nonew == 1) goto noinsert2;
1916       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1917       if (A->structure_only) {
1918         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1919       } else {
1920         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1921       }
1922       N = nrow++ - 1; high++;
1923       /* shift up all the later entries in this row */
1924       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
1925       rp[i] = col;
1926       if (!A->structure_only) {
1927         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
1928         bap   = ap +  bs2*i;
1929         if (roworiented) {
1930           for (ii=0; ii<bs; ii++,value+=stepval) {
1931             for (jj=ii; jj<bs2; jj+=bs) {
1932               bap[jj] = *value++;
1933             }
1934           }
1935         } else {
1936           for (ii=0; ii<bs; ii++,value+=stepval) {
1937             for (jj=0; jj<bs; jj++) {
1938               *bap++ = *value++;
1939             }
1940           }
1941         }
1942       }
1943 noinsert2:;
1944       low = i;
1945     }
1946     ailen[row] = nrow;
1947   }
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1952 {
1953   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1954   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1955   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1956   PetscErrorCode ierr;
1957   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1958   MatScalar      *aa  = a->a,*ap;
1959   PetscReal      ratio=0.6;
1960 
1961   PetscFunctionBegin;
1962   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1963 
1964   if (m) rmax = ailen[0];
1965   for (i=1; i<mbs; i++) {
1966     /* move each row back by the amount of empty slots (fshift) before it*/
1967     fshift += imax[i-1] - ailen[i-1];
1968     rmax    = PetscMax(rmax,ailen[i]);
1969     if (fshift) {
1970       ip = aj + ai[i];
1971       ap = aa + bs2*ai[i];
1972       N  = ailen[i];
1973       ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr);
1974       if (!A->structure_only) {
1975         ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr);
1976       }
1977     }
1978     ai[i] = ai[i-1] + ailen[i-1];
1979   }
1980   if (mbs) {
1981     fshift += imax[mbs-1] - ailen[mbs-1];
1982     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1983   }
1984 
1985   /* reset ilen and imax for each row */
1986   a->nonzerorowcnt = 0;
1987   if (A->structure_only) {
1988     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1989   } else { /* !A->structure_only */
1990     for (i=0; i<mbs; i++) {
1991       ailen[i] = imax[i] = ai[i+1] - ai[i];
1992       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1993     }
1994   }
1995   a->nz = ai[mbs];
1996 
1997   /* diagonals may have moved, so kill the diagonal pointers */
1998   a->idiagvalid = PETSC_FALSE;
1999   if (fshift && a->diag) {
2000     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
2001     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2002     a->diag = NULL;
2003   }
2004   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2005   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2006   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2007   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2008 
2009   A->info.mallocs    += a->reallocs;
2010   a->reallocs         = 0;
2011   A->info.nz_unneeded = (PetscReal)fshift*bs2;
2012   a->rmax             = rmax;
2013 
2014   if (!A->structure_only) {
2015     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2016   }
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 /*
2021    This function returns an array of flags which indicate the locations of contiguous
2022    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2023    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2024    Assume: sizes should be long enough to hold all the values.
2025 */
2026 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2027 {
2028   PetscInt  i,j,k,row;
2029   PetscBool flg;
2030 
2031   PetscFunctionBegin;
2032   for (i=0,j=0; i<n; j++) {
2033     row = idx[i];
2034     if (row%bs!=0) { /* Not the beginning of a block */
2035       sizes[j] = 1;
2036       i++;
2037     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2038       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2039       i++;
2040     } else { /* Begining of the block, so check if the complete block exists */
2041       flg = PETSC_TRUE;
2042       for (k=1; k<bs; k++) {
2043         if (row+k != idx[i+k]) { /* break in the block */
2044           flg = PETSC_FALSE;
2045           break;
2046         }
2047       }
2048       if (flg) { /* No break in the bs */
2049         sizes[j] = bs;
2050         i       += bs;
2051       } else {
2052         sizes[j] = 1;
2053         i++;
2054       }
2055     }
2056   }
2057   *bs_max = j;
2058   PetscFunctionReturn(0);
2059 }
2060 
2061 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2062 {
2063   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2064   PetscErrorCode    ierr;
2065   PetscInt          i,j,k,count,*rows;
2066   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2067   PetscScalar       zero = 0.0;
2068   MatScalar         *aa;
2069   const PetscScalar *xx;
2070   PetscScalar       *bb;
2071 
2072   PetscFunctionBegin;
2073   /* fix right hand side if needed */
2074   if (x && b) {
2075     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2076     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2077     for (i=0; i<is_n; i++) {
2078       bb[is_idx[i]] = diag*xx[is_idx[i]];
2079     }
2080     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2081     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2082   }
2083 
2084   /* Make a copy of the IS and  sort it */
2085   /* allocate memory for rows,sizes */
2086   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2087 
2088   /* copy IS values to rows, and sort them */
2089   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2090   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2091 
2092   if (baij->keepnonzeropattern) {
2093     for (i=0; i<is_n; i++) sizes[i] = 1;
2094     bs_max          = is_n;
2095   } else {
2096     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2097     A->nonzerostate++;
2098   }
2099 
2100   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2101     row = rows[j];
2102     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2103     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2104     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2105     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2106       if (diag != (PetscScalar)0.0) {
2107         if (baij->ilen[row/bs] > 0) {
2108           baij->ilen[row/bs]       = 1;
2109           baij->j[baij->i[row/bs]] = row/bs;
2110 
2111           ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr);
2112         }
2113         /* Now insert all the diagonal values for this bs */
2114         for (k=0; k<bs; k++) {
2115           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2116         }
2117       } else { /* (diag == 0.0) */
2118         baij->ilen[row/bs] = 0;
2119       } /* end (diag == 0.0) */
2120     } else { /* (sizes[i] != bs) */
2121       if (PetscUnlikelyDebug(sizes[i] != 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2122       for (k=0; k<count; k++) {
2123         aa[0] =  zero;
2124         aa   += bs;
2125       }
2126       if (diag != (PetscScalar)0.0) {
2127         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2128       }
2129     }
2130   }
2131 
2132   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2133   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2138 {
2139   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2140   PetscErrorCode    ierr;
2141   PetscInt          i,j,k,count;
2142   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2143   PetscScalar       zero = 0.0;
2144   MatScalar         *aa;
2145   const PetscScalar *xx;
2146   PetscScalar       *bb;
2147   PetscBool         *zeroed,vecs = PETSC_FALSE;
2148 
2149   PetscFunctionBegin;
2150   /* fix right hand side if needed */
2151   if (x && b) {
2152     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2153     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2154     vecs = PETSC_TRUE;
2155   }
2156 
2157   /* zero the columns */
2158   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2159   for (i=0; i<is_n; i++) {
2160     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2161     zeroed[is_idx[i]] = PETSC_TRUE;
2162   }
2163   for (i=0; i<A->rmap->N; i++) {
2164     if (!zeroed[i]) {
2165       row = i/bs;
2166       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2167         for (k=0; k<bs; k++) {
2168           col = bs*baij->j[j] + k;
2169           if (zeroed[col]) {
2170             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2171             if (vecs) bb[i] -= aa[0]*xx[col];
2172             aa[0] = 0.0;
2173           }
2174         }
2175       }
2176     } else if (vecs) bb[i] = diag*xx[i];
2177   }
2178   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2179   if (vecs) {
2180     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2181     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2182   }
2183 
2184   /* zero the rows */
2185   for (i=0; i<is_n; i++) {
2186     row   = is_idx[i];
2187     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2188     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2189     for (k=0; k<count; k++) {
2190       aa[0] =  zero;
2191       aa   += bs;
2192     }
2193     if (diag != (PetscScalar)0.0) {
2194       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2195     }
2196   }
2197   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2198   PetscFunctionReturn(0);
2199 }
2200 
2201 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2202 {
2203   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2204   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2205   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2206   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2207   PetscErrorCode ierr;
2208   PetscInt       ridx,cidx,bs2=a->bs2;
2209   PetscBool      roworiented=a->roworiented;
2210   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2211 
2212   PetscFunctionBegin;
2213   for (k=0; k<m; k++) { /* loop over added rows */
2214     row  = im[k];
2215     brow = row/bs;
2216     if (row < 0) continue;
2217     if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2218     rp   = aj + ai[brow];
2219     if (!A->structure_only) ap = aa + bs2*ai[brow];
2220     rmax = imax[brow];
2221     nrow = ailen[brow];
2222     low  = 0;
2223     high = nrow;
2224     for (l=0; l<n; l++) { /* loop over added columns */
2225       if (in[l] < 0) continue;
2226       if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2227       col  = in[l]; bcol = col/bs;
2228       ridx = row % bs; cidx = col % bs;
2229       if (!A->structure_only) {
2230         if (roworiented) {
2231           value = v[l + k*n];
2232         } else {
2233           value = v[k + l*m];
2234         }
2235       }
2236       if (col <= lastcol) low = 0; else high = nrow;
2237       lastcol = col;
2238       while (high-low > 7) {
2239         t = (low+high)/2;
2240         if (rp[t] > bcol) high = t;
2241         else              low  = t;
2242       }
2243       for (i=low; i<high; i++) {
2244         if (rp[i] > bcol) break;
2245         if (rp[i] == bcol) {
2246           bap = ap +  bs2*i + bs*cidx + ridx;
2247           if (!A->structure_only) {
2248             if (is == ADD_VALUES) *bap += value;
2249             else                  *bap  = value;
2250           }
2251           goto noinsert1;
2252         }
2253       }
2254       if (nonew == 1) goto noinsert1;
2255       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2256       if (A->structure_only) {
2257         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2258       } else {
2259         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2260       }
2261       N = nrow++ - 1; high++;
2262       /* shift up all the later entries in this row */
2263       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
2264       rp[i] = bcol;
2265       if (!A->structure_only) {
2266         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
2267         ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr);
2268         ap[bs2*i + bs*cidx + ridx] = value;
2269       }
2270       a->nz++;
2271       A->nonzerostate++;
2272 noinsert1:;
2273       low = i;
2274     }
2275     ailen[brow] = nrow;
2276   }
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2281 {
2282   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2283   Mat            outA;
2284   PetscErrorCode ierr;
2285   PetscBool      row_identity,col_identity;
2286 
2287   PetscFunctionBegin;
2288   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2289   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2290   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2291   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2292 
2293   outA            = inA;
2294   inA->factortype = MAT_FACTOR_LU;
2295   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2296   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2297 
2298   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2299 
2300   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2301   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2302   a->row = row;
2303   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2304   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2305   a->col = col;
2306 
2307   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2308   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2309   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2310   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2311 
2312   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2313   if (!a->solve_work) {
2314     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2315     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2316   }
2317   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2322 {
2323   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2324   PetscInt    i,nz,mbs;
2325 
2326   PetscFunctionBegin;
2327   nz  = baij->maxnz;
2328   mbs = baij->mbs;
2329   for (i=0; i<nz; i++) {
2330     baij->j[i] = indices[i];
2331   }
2332   baij->nz = nz;
2333   for (i=0; i<mbs; i++) {
2334     baij->ilen[i] = baij->imax[i];
2335   }
2336   PetscFunctionReturn(0);
2337 }
2338 
2339 /*@
2340     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2341        in the matrix.
2342 
2343   Input Parameters:
2344 +  mat - the SeqBAIJ matrix
2345 -  indices - the column indices
2346 
2347   Level: advanced
2348 
2349   Notes:
2350     This can be called if you have precomputed the nonzero structure of the
2351   matrix and want to provide it to the matrix object to improve the performance
2352   of the MatSetValues() operation.
2353 
2354     You MUST have set the correct numbers of nonzeros per row in the call to
2355   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2356 
2357     MUST be called before any calls to MatSetValues();
2358 
2359 @*/
2360 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2361 {
2362   PetscErrorCode ierr;
2363 
2364   PetscFunctionBegin;
2365   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2366   PetscValidPointer(indices,2);
2367   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2368   PetscFunctionReturn(0);
2369 }
2370 
2371 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2372 {
2373   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2374   PetscErrorCode ierr;
2375   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2376   PetscReal      atmp;
2377   PetscScalar    *x,zero = 0.0;
2378   MatScalar      *aa;
2379   PetscInt       ncols,brow,krow,kcol;
2380 
2381   PetscFunctionBegin;
2382   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2383   bs  = A->rmap->bs;
2384   aa  = a->a;
2385   ai  = a->i;
2386   aj  = a->j;
2387   mbs = a->mbs;
2388 
2389   ierr = VecSet(v,zero);CHKERRQ(ierr);
2390   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2391   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2392   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2393   for (i=0; i<mbs; i++) {
2394     ncols = ai[1] - ai[0]; ai++;
2395     brow  = bs*i;
2396     for (j=0; j<ncols; j++) {
2397       for (kcol=0; kcol<bs; kcol++) {
2398         for (krow=0; krow<bs; krow++) {
2399           atmp = PetscAbsScalar(*aa);aa++;
2400           row  = brow + krow;   /* row index */
2401           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2402         }
2403       }
2404       aj++;
2405     }
2406   }
2407   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2412 {
2413   PetscErrorCode ierr;
2414 
2415   PetscFunctionBegin;
2416   /* If the two matrices have the same copy implementation, use fast copy. */
2417   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2418     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2419     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2420     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2421 
2422     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2423     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2424     ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr);
2425     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2426   } else {
2427     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2428   }
2429   PetscFunctionReturn(0);
2430 }
2431 
2432 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2433 {
2434   PetscErrorCode ierr;
2435 
2436   PetscFunctionBegin;
2437   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2438   PetscFunctionReturn(0);
2439 }
2440 
2441 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2442 {
2443   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2444 
2445   PetscFunctionBegin;
2446   *array = a->a;
2447   PetscFunctionReturn(0);
2448 }
2449 
2450 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2451 {
2452   PetscFunctionBegin;
2453   *array = NULL;
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2458 {
2459   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2460   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2461   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2462   PetscErrorCode ierr;
2463 
2464   PetscFunctionBegin;
2465   /* Set the number of nonzeros in the new matrix */
2466   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2471 {
2472   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2473   PetscErrorCode ierr;
2474   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2475   PetscBLASInt   one=1;
2476 
2477   PetscFunctionBegin;
2478   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2479     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2480     if (e) {
2481       ierr = PetscArraycmp(x->i,y->i,x->mbs+1,&e);CHKERRQ(ierr);
2482       if (e) {
2483         ierr = PetscArraycmp(x->j,y->j,x->i[x->mbs],&e);CHKERRQ(ierr);
2484         if (e) str = SAME_NONZERO_PATTERN;
2485       }
2486     }
2487     if (!e && str == SAME_NONZERO_PATTERN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"MatStructure is not SAME_NONZERO_PATTERN");
2488   }
2489   if (str == SAME_NONZERO_PATTERN) {
2490     PetscScalar  alpha = a;
2491     PetscBLASInt bnz;
2492     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2493     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2494     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2495   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2496     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2497   } else {
2498     Mat      B;
2499     PetscInt *nnz;
2500     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2501     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2502     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2503     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2504     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2505     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2506     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2507     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2508     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2509     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2510     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2511     ierr = PetscFree(nnz);CHKERRQ(ierr);
2512   }
2513   PetscFunctionReturn(0);
2514 }
2515 
2516 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2517 {
2518 #if defined(PETSC_USE_COMPLEX)
2519   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2520   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2521   MatScalar   *aa = a->a;
2522 
2523   PetscFunctionBegin;
2524   for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]);
2525 #else
2526   PetscFunctionBegin;
2527 #endif
2528   PetscFunctionReturn(0);
2529 }
2530 
2531 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2532 {
2533   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2534   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2535   MatScalar   *aa = a->a;
2536 
2537   PetscFunctionBegin;
2538   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2539   PetscFunctionReturn(0);
2540 }
2541 
2542 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2543 {
2544   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2545   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2546   MatScalar   *aa = a->a;
2547 
2548   PetscFunctionBegin;
2549   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2550   PetscFunctionReturn(0);
2551 }
2552 
2553 /*
2554     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2555 */
2556 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2557 {
2558   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2559   PetscErrorCode ierr;
2560   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2561   PetscInt       nz = a->i[m],row,*jj,mr,col;
2562 
2563   PetscFunctionBegin;
2564   *nn = n;
2565   if (!ia) PetscFunctionReturn(0);
2566   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2567   else {
2568     ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2569     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2570     ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2571     jj   = a->j;
2572     for (i=0; i<nz; i++) {
2573       collengths[jj[i]]++;
2574     }
2575     cia[0] = oshift;
2576     for (i=0; i<n; i++) {
2577       cia[i+1] = cia[i] + collengths[i];
2578     }
2579     ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2580     jj   = a->j;
2581     for (row=0; row<m; row++) {
2582       mr = a->i[row+1] - a->i[row];
2583       for (i=0; i<mr; i++) {
2584         col = *jj++;
2585 
2586         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2587       }
2588     }
2589     ierr = PetscFree(collengths);CHKERRQ(ierr);
2590     *ia  = cia; *ja = cja;
2591   }
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2596 {
2597   PetscErrorCode ierr;
2598 
2599   PetscFunctionBegin;
2600   if (!ia) PetscFunctionReturn(0);
2601   ierr = PetscFree(*ia);CHKERRQ(ierr);
2602   ierr = PetscFree(*ja);CHKERRQ(ierr);
2603   PetscFunctionReturn(0);
2604 }
2605 
2606 /*
2607  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2608  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2609  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2610  */
2611 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2612 {
2613   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2614   PetscErrorCode ierr;
2615   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2616   PetscInt       nz = a->i[m],row,*jj,mr,col;
2617   PetscInt       *cspidx;
2618 
2619   PetscFunctionBegin;
2620   *nn = n;
2621   if (!ia) PetscFunctionReturn(0);
2622 
2623   ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2624   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2625   ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2626   ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr);
2627   jj   = a->j;
2628   for (i=0; i<nz; i++) {
2629     collengths[jj[i]]++;
2630   }
2631   cia[0] = oshift;
2632   for (i=0; i<n; i++) {
2633     cia[i+1] = cia[i] + collengths[i];
2634   }
2635   ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2636   jj   = a->j;
2637   for (row=0; row<m; row++) {
2638     mr = a->i[row+1] - a->i[row];
2639     for (i=0; i<mr; i++) {
2640       col = *jj++;
2641       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2642       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2643     }
2644   }
2645   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2646   *ia    = cia;
2647   *ja    = cja;
2648   *spidx = cspidx;
2649   PetscFunctionReturn(0);
2650 }
2651 
2652 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2653 {
2654   PetscErrorCode ierr;
2655 
2656   PetscFunctionBegin;
2657   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2658   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2659   PetscFunctionReturn(0);
2660 }
2661 
2662 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2663 {
2664   PetscErrorCode ierr;
2665   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2666 
2667   PetscFunctionBegin;
2668   if (!Y->preallocated || !aij->nz) {
2669     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2670   }
2671   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2672   PetscFunctionReturn(0);
2673 }
2674 
2675 /* -------------------------------------------------------------------*/
2676 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2677                                        MatGetRow_SeqBAIJ,
2678                                        MatRestoreRow_SeqBAIJ,
2679                                        MatMult_SeqBAIJ_N,
2680                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2681                                        MatMultTranspose_SeqBAIJ,
2682                                        MatMultTransposeAdd_SeqBAIJ,
2683                                        NULL,
2684                                        NULL,
2685                                        NULL,
2686                                /* 10*/ NULL,
2687                                        MatLUFactor_SeqBAIJ,
2688                                        NULL,
2689                                        NULL,
2690                                        MatTranspose_SeqBAIJ,
2691                                /* 15*/ MatGetInfo_SeqBAIJ,
2692                                        MatEqual_SeqBAIJ,
2693                                        MatGetDiagonal_SeqBAIJ,
2694                                        MatDiagonalScale_SeqBAIJ,
2695                                        MatNorm_SeqBAIJ,
2696                                /* 20*/ NULL,
2697                                        MatAssemblyEnd_SeqBAIJ,
2698                                        MatSetOption_SeqBAIJ,
2699                                        MatZeroEntries_SeqBAIJ,
2700                                /* 24*/ MatZeroRows_SeqBAIJ,
2701                                        NULL,
2702                                        NULL,
2703                                        NULL,
2704                                        NULL,
2705                                /* 29*/ MatSetUp_SeqBAIJ,
2706                                        NULL,
2707                                        NULL,
2708                                        NULL,
2709                                        NULL,
2710                                /* 34*/ MatDuplicate_SeqBAIJ,
2711                                        NULL,
2712                                        NULL,
2713                                        MatILUFactor_SeqBAIJ,
2714                                        NULL,
2715                                /* 39*/ MatAXPY_SeqBAIJ,
2716                                        MatCreateSubMatrices_SeqBAIJ,
2717                                        MatIncreaseOverlap_SeqBAIJ,
2718                                        MatGetValues_SeqBAIJ,
2719                                        MatCopy_SeqBAIJ,
2720                                /* 44*/ NULL,
2721                                        MatScale_SeqBAIJ,
2722                                        MatShift_SeqBAIJ,
2723                                        NULL,
2724                                        MatZeroRowsColumns_SeqBAIJ,
2725                                /* 49*/ NULL,
2726                                        MatGetRowIJ_SeqBAIJ,
2727                                        MatRestoreRowIJ_SeqBAIJ,
2728                                        MatGetColumnIJ_SeqBAIJ,
2729                                        MatRestoreColumnIJ_SeqBAIJ,
2730                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                        MatSetValuesBlocked_SeqBAIJ,
2735                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2736                                        MatDestroy_SeqBAIJ,
2737                                        MatView_SeqBAIJ,
2738                                        NULL,
2739                                        NULL,
2740                                /* 64*/ NULL,
2741                                        NULL,
2742                                        NULL,
2743                                        NULL,
2744                                        NULL,
2745                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2746                                        NULL,
2747                                        MatConvert_Basic,
2748                                        NULL,
2749                                        NULL,
2750                                /* 74*/ NULL,
2751                                        MatFDColoringApply_BAIJ,
2752                                        NULL,
2753                                        NULL,
2754                                        NULL,
2755                                /* 79*/ NULL,
2756                                        NULL,
2757                                        NULL,
2758                                        NULL,
2759                                        MatLoad_SeqBAIJ,
2760                                /* 84*/ NULL,
2761                                        NULL,
2762                                        NULL,
2763                                        NULL,
2764                                        NULL,
2765                                /* 89*/ NULL,
2766                                        NULL,
2767                                        NULL,
2768                                        NULL,
2769                                        NULL,
2770                                /* 94*/ NULL,
2771                                        NULL,
2772                                        NULL,
2773                                        NULL,
2774                                        NULL,
2775                                /* 99*/ NULL,
2776                                        NULL,
2777                                        NULL,
2778                                        MatConjugate_SeqBAIJ,
2779                                        NULL,
2780                                /*104*/ NULL,
2781                                        MatRealPart_SeqBAIJ,
2782                                        MatImaginaryPart_SeqBAIJ,
2783                                        NULL,
2784                                        NULL,
2785                                /*109*/ NULL,
2786                                        NULL,
2787                                        NULL,
2788                                        NULL,
2789                                        MatMissingDiagonal_SeqBAIJ,
2790                                /*114*/ NULL,
2791                                        NULL,
2792                                        NULL,
2793                                        NULL,
2794                                        NULL,
2795                                /*119*/ NULL,
2796                                        NULL,
2797                                        MatMultHermitianTranspose_SeqBAIJ,
2798                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2799                                        NULL,
2800                                /*124*/ NULL,
2801                                        MatGetColumnReductions_SeqBAIJ,
2802                                        MatInvertBlockDiagonal_SeqBAIJ,
2803                                        NULL,
2804                                        NULL,
2805                                /*129*/ NULL,
2806                                        NULL,
2807                                        NULL,
2808                                        NULL,
2809                                        NULL,
2810                                /*134*/ NULL,
2811                                        NULL,
2812                                        NULL,
2813                                        NULL,
2814                                        NULL,
2815                                /*139*/ MatSetBlockSizes_Default,
2816                                        NULL,
2817                                        NULL,
2818                                        MatFDColoringSetUp_SeqXAIJ,
2819                                        NULL,
2820                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2821                                        MatDestroySubMatrices_SeqBAIJ
2822 };
2823 
2824 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2825 {
2826   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2827   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2828   PetscErrorCode ierr;
2829 
2830   PetscFunctionBegin;
2831   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2832 
2833   /* allocate space for values if not already there */
2834   if (!aij->saved_values) {
2835     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2836     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2837   }
2838 
2839   /* copy values over */
2840   ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr);
2841   PetscFunctionReturn(0);
2842 }
2843 
2844 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2845 {
2846   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2847   PetscErrorCode ierr;
2848   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2849 
2850   PetscFunctionBegin;
2851   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2852   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2853 
2854   /* copy values over */
2855   ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr);
2856   PetscFunctionReturn(0);
2857 }
2858 
2859 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2860 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2861 
2862 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2863 {
2864   Mat_SeqBAIJ    *b;
2865   PetscErrorCode ierr;
2866   PetscInt       i,mbs,nbs,bs2;
2867   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2868 
2869   PetscFunctionBegin;
2870   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2871   if (nz == MAT_SKIP_ALLOCATION) {
2872     skipallocation = PETSC_TRUE;
2873     nz             = 0;
2874   }
2875 
2876   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2877   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2878   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2879   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2880 
2881   B->preallocated = PETSC_TRUE;
2882 
2883   mbs = B->rmap->n/bs;
2884   nbs = B->cmap->n/bs;
2885   bs2 = bs*bs;
2886 
2887   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2888 
2889   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2890   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2891   if (nnz) {
2892     for (i=0; i<mbs; i++) {
2893       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2894       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2895     }
2896   }
2897 
2898   b    = (Mat_SeqBAIJ*)B->data;
2899   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2900   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2901   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2902 
2903   if (!flg) {
2904     switch (bs) {
2905     case 1:
2906       B->ops->mult    = MatMult_SeqBAIJ_1;
2907       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2908       break;
2909     case 2:
2910       B->ops->mult    = MatMult_SeqBAIJ_2;
2911       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2912       break;
2913     case 3:
2914       B->ops->mult    = MatMult_SeqBAIJ_3;
2915       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2916       break;
2917     case 4:
2918       B->ops->mult    = MatMult_SeqBAIJ_4;
2919       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2920       break;
2921     case 5:
2922       B->ops->mult    = MatMult_SeqBAIJ_5;
2923       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2924       break;
2925     case 6:
2926       B->ops->mult    = MatMult_SeqBAIJ_6;
2927       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2928       break;
2929     case 7:
2930       B->ops->mult    = MatMult_SeqBAIJ_7;
2931       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2932       break;
2933     case 9:
2934     {
2935       PetscInt version = 1;
2936       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
2937       switch (version) {
2938 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2939       case 1:
2940         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2941         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2942         ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2943         break;
2944 #endif
2945       default:
2946         B->ops->mult    = MatMult_SeqBAIJ_N;
2947         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2948         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2949         break;
2950       }
2951       break;
2952     }
2953     case 11:
2954       B->ops->mult    = MatMult_SeqBAIJ_11;
2955       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2956       break;
2957     case 12:
2958     {
2959       PetscInt version = 1;
2960       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
2961       switch (version) {
2962       case 1:
2963         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
2964         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2965         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2966         break;
2967       case 2:
2968         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
2969         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
2970         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2971         break;
2972 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2973       case 3:
2974         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
2975         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2976         ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2977         break;
2978 #endif
2979       default:
2980         B->ops->mult    = MatMult_SeqBAIJ_N;
2981         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2982         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2983         break;
2984       }
2985       break;
2986     }
2987     case 15:
2988     {
2989       PetscInt version = 1;
2990       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
2991       switch (version) {
2992       case 1:
2993         B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2994         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2995         break;
2996       case 2:
2997         B->ops->mult    = MatMult_SeqBAIJ_15_ver2;
2998         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2999         break;
3000       case 3:
3001         B->ops->mult    = MatMult_SeqBAIJ_15_ver3;
3002         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
3003         break;
3004       case 4:
3005         B->ops->mult    = MatMult_SeqBAIJ_15_ver4;
3006         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
3007         break;
3008       default:
3009         B->ops->mult    = MatMult_SeqBAIJ_N;
3010         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
3011         break;
3012       }
3013       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3014       break;
3015     }
3016     default:
3017       B->ops->mult    = MatMult_SeqBAIJ_N;
3018       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3019       ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
3020       break;
3021     }
3022   }
3023   B->ops->sor = MatSOR_SeqBAIJ;
3024   b->mbs = mbs;
3025   b->nbs = nbs;
3026   if (!skipallocation) {
3027     if (!b->imax) {
3028       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
3029       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3030 
3031       b->free_imax_ilen = PETSC_TRUE;
3032     }
3033     /* b->ilen will count nonzeros in each block row so far. */
3034     for (i=0; i<mbs; i++) b->ilen[i] = 0;
3035     if (!nnz) {
3036       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3037       else if (nz < 0) nz = 1;
3038       nz = PetscMin(nz,nbs);
3039       for (i=0; i<mbs; i++) b->imax[i] = nz;
3040       ierr = PetscIntMultError(nz,mbs,&nz);CHKERRQ(ierr);
3041     } else {
3042       PetscInt64 nz64 = 0;
3043       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
3044       ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr);
3045     }
3046 
3047     /* allocate the matrix space */
3048     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3049     if (B->structure_only) {
3050       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
3051       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
3052       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
3053     } else {
3054       PetscInt nzbs2 = 0;
3055       ierr = PetscIntMultError(nz,bs2,&nzbs2);CHKERRQ(ierr);
3056       ierr = PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
3057       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3058       ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr);
3059     }
3060     ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr);
3061 
3062     if (B->structure_only) {
3063       b->singlemalloc = PETSC_FALSE;
3064       b->free_a       = PETSC_FALSE;
3065     } else {
3066       b->singlemalloc = PETSC_TRUE;
3067       b->free_a       = PETSC_TRUE;
3068     }
3069     b->free_ij = PETSC_TRUE;
3070 
3071     b->i[0] = 0;
3072     for (i=1; i<mbs+1; i++) {
3073       b->i[i] = b->i[i-1] + b->imax[i-1];
3074     }
3075 
3076   } else {
3077     b->free_a  = PETSC_FALSE;
3078     b->free_ij = PETSC_FALSE;
3079   }
3080 
3081   b->bs2              = bs2;
3082   b->mbs              = mbs;
3083   b->nz               = 0;
3084   b->maxnz            = nz;
3085   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3086   B->was_assembled    = PETSC_FALSE;
3087   B->assembled        = PETSC_FALSE;
3088   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3089   PetscFunctionReturn(0);
3090 }
3091 
3092 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3093 {
3094   PetscInt       i,m,nz,nz_max=0,*nnz;
3095   PetscScalar    *values=NULL;
3096   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
3097   PetscErrorCode ierr;
3098 
3099   PetscFunctionBegin;
3100   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3101   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3102   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3103   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3104   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3105   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3106   m    = B->rmap->n/bs;
3107 
3108   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3109   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
3110   for (i=0; i<m; i++) {
3111     nz = ii[i+1]- ii[i];
3112     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3113     nz_max = PetscMax(nz_max, nz);
3114     nnz[i] = nz;
3115   }
3116   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3117   ierr = PetscFree(nnz);CHKERRQ(ierr);
3118 
3119   values = (PetscScalar*)V;
3120   if (!values) {
3121     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
3122   }
3123   for (i=0; i<m; i++) {
3124     PetscInt          ncols  = ii[i+1] - ii[i];
3125     const PetscInt    *icols = jj + ii[i];
3126     if (bs == 1 || !roworiented) {
3127       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3128       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3129     } else {
3130       PetscInt j;
3131       for (j=0; j<ncols; j++) {
3132         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3133         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
3134       }
3135     }
3136   }
3137   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3138   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3139   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3140   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3141   PetscFunctionReturn(0);
3142 }
3143 
3144 /*@C
3145    MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
3146 
3147    Not Collective
3148 
3149    Input Parameter:
3150 .  mat - a MATSEQBAIJ matrix
3151 
3152    Output Parameter:
3153 .   array - pointer to the data
3154 
3155    Level: intermediate
3156 
3157 .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3158 @*/
3159 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3160 {
3161   PetscErrorCode ierr;
3162 
3163   PetscFunctionBegin;
3164   ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3165   PetscFunctionReturn(0);
3166 }
3167 
3168 /*@C
3169    MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3170 
3171    Not Collective
3172 
3173    Input Parameters:
3174 +  mat - a MATSEQBAIJ matrix
3175 -  array - pointer to the data
3176 
3177    Level: intermediate
3178 
3179 .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3180 @*/
3181 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3182 {
3183   PetscErrorCode ierr;
3184 
3185   PetscFunctionBegin;
3186   ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3187   PetscFunctionReturn(0);
3188 }
3189 
3190 /*MC
3191    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3192    block sparse compressed row format.
3193 
3194    Options Database Keys:
3195 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3196 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3197 
3198    Level: beginner
3199 
3200    Notes:
3201     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3202     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
3203 
3204    Run with -info to see what version of the matrix-vector product is being used
3205 
3206 .seealso: MatCreateSeqBAIJ()
3207 M*/
3208 
3209 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3210 
3211 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3212 {
3213   PetscErrorCode ierr;
3214   PetscMPIInt    size;
3215   Mat_SeqBAIJ    *b;
3216 
3217   PetscFunctionBegin;
3218   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
3219   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3220 
3221   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3222   B->data = (void*)b;
3223   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3224 
3225   b->row          = NULL;
3226   b->col          = NULL;
3227   b->icol         = NULL;
3228   b->reallocs     = 0;
3229   b->saved_values = NULL;
3230 
3231   b->roworiented        = PETSC_TRUE;
3232   b->nonew              = 0;
3233   b->diag               = NULL;
3234   B->spptr              = NULL;
3235   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3236   b->keepnonzeropattern = PETSC_FALSE;
3237 
3238   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr);
3239   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr);
3240   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3241   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3242   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3243   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3244   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3245   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3246   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3247   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3248 #if defined(PETSC_HAVE_HYPRE)
3249   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3250 #endif
3251   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3252   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3253   PetscFunctionReturn(0);
3254 }
3255 
3256 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3257 {
3258   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3259   PetscErrorCode ierr;
3260   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3261 
3262   PetscFunctionBegin;
3263   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3264 
3265   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3266     c->imax           = a->imax;
3267     c->ilen           = a->ilen;
3268     c->free_imax_ilen = PETSC_FALSE;
3269   } else {
3270     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3271     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3272     for (i=0; i<mbs; i++) {
3273       c->imax[i] = a->imax[i];
3274       c->ilen[i] = a->ilen[i];
3275     }
3276     c->free_imax_ilen = PETSC_TRUE;
3277   }
3278 
3279   /* allocate the matrix space */
3280   if (mallocmatspace) {
3281     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3282       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3283       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3284 
3285       c->i            = a->i;
3286       c->j            = a->j;
3287       c->singlemalloc = PETSC_FALSE;
3288       c->free_a       = PETSC_TRUE;
3289       c->free_ij      = PETSC_FALSE;
3290       c->parent       = A;
3291       C->preallocated = PETSC_TRUE;
3292       C->assembled    = PETSC_TRUE;
3293 
3294       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3295       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3296       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3297     } else {
3298       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3299       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3300 
3301       c->singlemalloc = PETSC_TRUE;
3302       c->free_a       = PETSC_TRUE;
3303       c->free_ij      = PETSC_TRUE;
3304 
3305       ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr);
3306       if (mbs > 0) {
3307         ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr);
3308         if (cpvalues == MAT_COPY_VALUES) {
3309           ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr);
3310         } else {
3311           ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr);
3312         }
3313       }
3314       C->preallocated = PETSC_TRUE;
3315       C->assembled    = PETSC_TRUE;
3316     }
3317   }
3318 
3319   c->roworiented = a->roworiented;
3320   c->nonew       = a->nonew;
3321 
3322   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3323   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3324 
3325   c->bs2         = a->bs2;
3326   c->mbs         = a->mbs;
3327   c->nbs         = a->nbs;
3328 
3329   if (a->diag) {
3330     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3331       c->diag      = a->diag;
3332       c->free_diag = PETSC_FALSE;
3333     } else {
3334       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3335       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3336       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3337       c->free_diag = PETSC_TRUE;
3338     }
3339   } else c->diag = NULL;
3340 
3341   c->nz         = a->nz;
3342   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3343   c->solve_work = NULL;
3344   c->mult_work  = NULL;
3345   c->sor_workt  = NULL;
3346   c->sor_work   = NULL;
3347 
3348   c->compressedrow.use   = a->compressedrow.use;
3349   c->compressedrow.nrows = a->compressedrow.nrows;
3350   if (a->compressedrow.use) {
3351     i    = a->compressedrow.nrows;
3352     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3353     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3354     ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr);
3355     ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr);
3356   } else {
3357     c->compressedrow.use    = PETSC_FALSE;
3358     c->compressedrow.i      = NULL;
3359     c->compressedrow.rindex = NULL;
3360   }
3361   C->nonzerostate = A->nonzerostate;
3362 
3363   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3364   PetscFunctionReturn(0);
3365 }
3366 
3367 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3368 {
3369   PetscErrorCode ierr;
3370 
3371   PetscFunctionBegin;
3372   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3373   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3374   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3375   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3376   PetscFunctionReturn(0);
3377 }
3378 
3379 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3380 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3381 {
3382   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3383   PetscInt       *rowidxs,*colidxs;
3384   PetscScalar    *matvals;
3385   PetscErrorCode ierr;
3386 
3387   PetscFunctionBegin;
3388   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3389 
3390   /* read matrix header */
3391   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3392   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3393   M = header[1]; N = header[2]; nz = header[3];
3394   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3395   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3396   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ");
3397 
3398   /* set block sizes from the viewer's .info file */
3399   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3400   /* set local and global sizes if not set already */
3401   if (mat->rmap->n < 0) mat->rmap->n = M;
3402   if (mat->cmap->n < 0) mat->cmap->n = N;
3403   if (mat->rmap->N < 0) mat->rmap->N = M;
3404   if (mat->cmap->N < 0) mat->cmap->N = N;
3405   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3406   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3407 
3408   /* check if the matrix sizes are correct */
3409   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3410   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3411   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3412   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3413   mbs = m/bs; nbs = n/bs;
3414 
3415   /* read in row lengths, column indices and nonzero values */
3416   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3417   ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr);
3418   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3419   sum = rowidxs[m];
3420   if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3421 
3422   /* read in column indices and nonzero values */
3423   ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr);
3424   ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr);
3425   ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr);
3426 
3427   { /* preallocate matrix storage */
3428     PetscBT   bt; /* helper bit set to count nonzeros */
3429     PetscInt  *nnz;
3430     PetscBool sbaij;
3431 
3432     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3433     ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr);
3434     ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr);
3435     for (i=0; i<mbs; i++) {
3436       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3437       for (k=0; k<bs; k++) {
3438         PetscInt row = bs*i + k;
3439         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3440           PetscInt col = colidxs[j];
3441           if (!sbaij || col >= row)
3442             if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++;
3443         }
3444       }
3445     }
3446     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3447     ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3448     ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3449     ierr = PetscFree(nnz);CHKERRQ(ierr);
3450   }
3451 
3452   /* store matrix values */
3453   for (i=0; i<m; i++) {
3454     PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3455     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3456   }
3457 
3458   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3459   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3460   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3461   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3462   PetscFunctionReturn(0);
3463 }
3464 
3465 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3466 {
3467   PetscErrorCode ierr;
3468   PetscBool      isbinary;
3469 
3470   PetscFunctionBegin;
3471   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3472   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3473   ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3474   PetscFunctionReturn(0);
3475 }
3476 
3477 /*@C
3478    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3479    compressed row) format.  For good matrix assembly performance the
3480    user should preallocate the matrix storage by setting the parameter nz
3481    (or the array nnz).  By setting these parameters accurately, performance
3482    during matrix assembly can be increased by more than a factor of 50.
3483 
3484    Collective
3485 
3486    Input Parameters:
3487 +  comm - MPI communicator, set to PETSC_COMM_SELF
3488 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3489           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3490 .  m - number of rows
3491 .  n - number of columns
3492 .  nz - number of nonzero blocks  per block row (same for all rows)
3493 -  nnz - array containing the number of nonzero blocks in the various block rows
3494          (possibly different for each block row) or NULL
3495 
3496    Output Parameter:
3497 .  A - the matrix
3498 
3499    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3500    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3501    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3502 
3503    Options Database Keys:
3504 +   -mat_no_unroll - uses code that does not unroll the loops in the
3505                      block calculations (much slower)
3506 -    -mat_block_size - size of the blocks to use
3507 
3508    Level: intermediate
3509 
3510    Notes:
3511    The number of rows and columns must be divisible by blocksize.
3512 
3513    If the nnz parameter is given then the nz parameter is ignored
3514 
3515    A nonzero block is any block that as 1 or more nonzeros in it
3516 
3517    The block AIJ format is fully compatible with standard Fortran 77
3518    storage.  That is, the stored row and column indices can begin at
3519    either one (as in Fortran) or zero.  See the users' manual for details.
3520 
3521    Specify the preallocated storage with either nz or nnz (not both).
3522    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3523    allocation.  See Users-Manual: ch_mat for details.
3524    matrices.
3525 
3526 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3527 @*/
3528 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3529 {
3530   PetscErrorCode ierr;
3531 
3532   PetscFunctionBegin;
3533   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3534   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3535   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3536   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3537   PetscFunctionReturn(0);
3538 }
3539 
3540 /*@C
3541    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3542    per row in the matrix. For good matrix assembly performance the
3543    user should preallocate the matrix storage by setting the parameter nz
3544    (or the array nnz).  By setting these parameters accurately, performance
3545    during matrix assembly can be increased by more than a factor of 50.
3546 
3547    Collective
3548 
3549    Input Parameters:
3550 +  B - the matrix
3551 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3552           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3553 .  nz - number of block nonzeros per block row (same for all rows)
3554 -  nnz - array containing the number of block nonzeros in the various block rows
3555          (possibly different for each block row) or NULL
3556 
3557    Options Database Keys:
3558 +   -mat_no_unroll - uses code that does not unroll the loops in the
3559                      block calculations (much slower)
3560 -   -mat_block_size - size of the blocks to use
3561 
3562    Level: intermediate
3563 
3564    Notes:
3565    If the nnz parameter is given then the nz parameter is ignored
3566 
3567    You can call MatGetInfo() to get information on how effective the preallocation was;
3568    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3569    You can also run with the option -info and look for messages with the string
3570    malloc in them to see if additional memory allocation was needed.
3571 
3572    The block AIJ format is fully compatible with standard Fortran 77
3573    storage.  That is, the stored row and column indices can begin at
3574    either one (as in Fortran) or zero.  See the users' manual for details.
3575 
3576    Specify the preallocated storage with either nz or nnz (not both).
3577    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3578    allocation.  See Users-Manual: ch_mat for details.
3579 
3580 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3581 @*/
3582 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3583 {
3584   PetscErrorCode ierr;
3585 
3586   PetscFunctionBegin;
3587   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3588   PetscValidType(B,1);
3589   PetscValidLogicalCollectiveInt(B,bs,2);
3590   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3591   PetscFunctionReturn(0);
3592 }
3593 
3594 /*@C
3595    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3596 
3597    Collective
3598 
3599    Input Parameters:
3600 +  B - the matrix
3601 .  i - the indices into j for the start of each local row (starts with zero)
3602 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3603 -  v - optional values in the matrix
3604 
3605    Level: advanced
3606 
3607    Notes:
3608    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3609    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3610    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3611    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3612    block column and the second index is over columns within a block.
3613 
3614    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3615 
3616 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3617 @*/
3618 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3619 {
3620   PetscErrorCode ierr;
3621 
3622   PetscFunctionBegin;
3623   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3624   PetscValidType(B,1);
3625   PetscValidLogicalCollectiveInt(B,bs,2);
3626   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3627   PetscFunctionReturn(0);
3628 }
3629 
3630 /*@
3631      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3632 
3633      Collective
3634 
3635    Input Parameters:
3636 +  comm - must be an MPI communicator of size 1
3637 .  bs - size of block
3638 .  m - number of rows
3639 .  n - number of columns
3640 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3641 .  j - column indices
3642 -  a - matrix values
3643 
3644    Output Parameter:
3645 .  mat - the matrix
3646 
3647    Level: advanced
3648 
3649    Notes:
3650        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3651     once the matrix is destroyed
3652 
3653        You cannot set new nonzero locations into this matrix, that will generate an error.
3654 
3655        The i and j indices are 0 based
3656 
3657        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3658 
3659       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3660       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3661       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3662       with column-major ordering within blocks.
3663 
3664 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3665 
3666 @*/
3667 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3668 {
3669   PetscErrorCode ierr;
3670   PetscInt       ii;
3671   Mat_SeqBAIJ    *baij;
3672 
3673   PetscFunctionBegin;
3674   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3675   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3676 
3677   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3678   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3679   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3680   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
3681   baij = (Mat_SeqBAIJ*)(*mat)->data;
3682   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3683   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3684 
3685   baij->i = i;
3686   baij->j = j;
3687   baij->a = a;
3688 
3689   baij->singlemalloc = PETSC_FALSE;
3690   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3691   baij->free_a       = PETSC_FALSE;
3692   baij->free_ij      = PETSC_FALSE;
3693 
3694   for (ii=0; ii<m; ii++) {
3695     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3696     if (PetscUnlikelyDebug(i[ii+1] - i[ii] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3697   }
3698   if (PetscDefined(USE_DEBUG)) {
3699     for (ii=0; ii<baij->i[m]; ii++) {
3700       if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3701       if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3702     }
3703   }
3704 
3705   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3706   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3707   PetscFunctionReturn(0);
3708 }
3709 
3710 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3711 {
3712   PetscErrorCode ierr;
3713   PetscMPIInt    size;
3714 
3715   PetscFunctionBegin;
3716   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3717   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3718     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3719   } else {
3720     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3721   }
3722   PetscFunctionReturn(0);
3723 }
3724