xref: /petsc/src/mat/impls/baij/seq/baij.c (revision ebead697dbf761eb322f829370bbe90b3bd93fa3)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 
20 PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A,PetscInt type,PetscReal *reductions)
21 {
22   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) A->data;
23   PetscInt       m,n,i;
24   PetscInt       ib,jb,bs = A->rmap->bs;
25   MatScalar      *a_val = a_aij->a;
26 
27   PetscFunctionBegin;
28   PetscCall(MatGetSize(A,&m,&n));
29   for (i=0; i<n; i++) reductions[i] = 0.0;
30   if (type == NORM_2) {
31     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
32       for (jb=0; jb<bs; jb++) {
33         for (ib=0; ib<bs; ib++) {
34           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
35           a_val++;
36         }
37       }
38     }
39   } else if (type == NORM_1) {
40     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
41       for (jb=0; jb<bs; jb++) {
42         for (ib=0; ib<bs; ib++) {
43           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
44           a_val++;
45         }
46       }
47     }
48   } else if (type == NORM_INFINITY) {
49     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
50       for (jb=0; jb<bs; jb++) {
51         for (ib=0; ib<bs; ib++) {
52           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
53           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
54           a_val++;
55         }
56       }
57     }
58   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
59     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
60       for (jb=0; jb<bs; jb++) {
61         for (ib=0; ib<bs; ib++) {
62           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
63           a_val++;
64         }
65       }
66     }
67   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
68     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
69       for (jb=0; jb<bs; jb++) {
70         for (ib=0; ib<bs; ib++) {
71           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
72           a_val++;
73         }
74       }
75     }
76   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
77   if (type == NORM_2) {
78     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
79   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
80     for (i=0; i<n; i++) reductions[i] /= m;
81   }
82   PetscFunctionReturn(0);
83 }
84 
85 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
86 {
87   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
88   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
89   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
90   PetscReal      shift = 0.0;
91   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
92 
93   PetscFunctionBegin;
94   allowzeropivot = PetscNot(A->erroriffailure);
95 
96   if (a->idiagvalid) {
97     if (values) *values = a->idiag;
98     PetscFunctionReturn(0);
99   }
100   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
101   diag_offset = a->diag;
102   if (!a->idiag) {
103     PetscCall(PetscMalloc1(bs2*mbs,&a->idiag));
104     PetscCall(PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar)));
105   }
106   diag  = a->idiag;
107   if (values) *values = a->idiag;
108   /* factor and invert each block */
109   switch (bs) {
110   case 1:
111     for (i=0; i<mbs; i++) {
112       odiag    = v + 1*diag_offset[i];
113       diag[0]  = odiag[0];
114 
115       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
116         if (allowzeropivot) {
117           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
118           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
119           A->factorerror_zeropivot_row   = i;
120           PetscCall(PetscInfo(A,"Zero pivot, row %" PetscInt_FMT "\n",i));
121         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
122       }
123 
124       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
125       diag    += 1;
126     }
127     break;
128   case 2:
129     for (i=0; i<mbs; i++) {
130       odiag    = v + 4*diag_offset[i];
131       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
132       PetscCall(PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected));
133       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
134       diag    += 4;
135     }
136     break;
137   case 3:
138     for (i=0; i<mbs; i++) {
139       odiag    = v + 9*diag_offset[i];
140       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
141       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
142       diag[8]  = odiag[8];
143       PetscCall(PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected));
144       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
145       diag    += 9;
146     }
147     break;
148   case 4:
149     for (i=0; i<mbs; i++) {
150       odiag  = v + 16*diag_offset[i];
151       PetscCall(PetscArraycpy(diag,odiag,16));
152       PetscCall(PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected));
153       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
154       diag  += 16;
155     }
156     break;
157   case 5:
158     for (i=0; i<mbs; i++) {
159       odiag  = v + 25*diag_offset[i];
160       PetscCall(PetscArraycpy(diag,odiag,25));
161       PetscCall(PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected));
162       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
163       diag  += 25;
164     }
165     break;
166   case 6:
167     for (i=0; i<mbs; i++) {
168       odiag  = v + 36*diag_offset[i];
169       PetscCall(PetscArraycpy(diag,odiag,36));
170       PetscCall(PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected));
171       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
172       diag  += 36;
173     }
174     break;
175   case 7:
176     for (i=0; i<mbs; i++) {
177       odiag  = v + 49*diag_offset[i];
178       PetscCall(PetscArraycpy(diag,odiag,49));
179       PetscCall(PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected));
180       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
181       diag  += 49;
182     }
183     break;
184   default:
185     PetscCall(PetscMalloc2(bs,&v_work,bs,&v_pivots));
186     for (i=0; i<mbs; i++) {
187       odiag  = v + bs2*diag_offset[i];
188       PetscCall(PetscArraycpy(diag,odiag,bs2));
189       PetscCall(PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected));
190       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
191       diag  += bs2;
192     }
193     PetscCall(PetscFree2(v_work,v_pivots));
194   }
195   a->idiagvalid = PETSC_TRUE;
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
200 {
201   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
202   PetscScalar       *x,*work,*w,*workt,*t;
203   const MatScalar   *v,*aa = a->a, *idiag;
204   const PetscScalar *b,*xb;
205   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
206   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
207   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
208 
209   PetscFunctionBegin;
210   its = its*lits;
211   PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
212   PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
213   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
214   PetscCheck(omega == 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor");
215   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
216 
217   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A,NULL));
218 
219   if (!m) PetscFunctionReturn(0);
220   diag  = a->diag;
221   idiag = a->idiag;
222   k    = PetscMax(A->rmap->n,A->cmap->n);
223   if (!a->mult_work) {
224     PetscCall(PetscMalloc1(k+1,&a->mult_work));
225   }
226   if (!a->sor_workt) {
227     PetscCall(PetscMalloc1(k,&a->sor_workt));
228   }
229   if (!a->sor_work) {
230     PetscCall(PetscMalloc1(bs,&a->sor_work));
231   }
232   work = a->mult_work;
233   t    = a->sor_workt;
234   w    = a->sor_work;
235 
236   PetscCall(VecGetArray(xx,&x));
237   PetscCall(VecGetArrayRead(bb,&b));
238 
239   if (flag & SOR_ZERO_INITIAL_GUESS) {
240     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
241       switch (bs) {
242       case 1:
243         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
244         t[0] = b[0];
245         i2     = 1;
246         idiag += 1;
247         for (i=1; i<m; i++) {
248           v  = aa + ai[i];
249           vi = aj + ai[i];
250           nz = diag[i] - ai[i];
251           s[0] = b[i2];
252           for (j=0; j<nz; j++) {
253             xw[0] = x[vi[j]];
254             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
255           }
256           t[i2] = s[0];
257           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
258           x[i2]  = xw[0];
259           idiag += 1;
260           i2    += 1;
261         }
262         break;
263       case 2:
264         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
265         t[0] = b[0]; t[1] = b[1];
266         i2     = 2;
267         idiag += 4;
268         for (i=1; i<m; i++) {
269           v  = aa + 4*ai[i];
270           vi = aj + ai[i];
271           nz = diag[i] - ai[i];
272           s[0] = b[i2]; s[1] = b[i2+1];
273           for (j=0; j<nz; j++) {
274             idx = 2*vi[j];
275             it  = 4*j;
276             xw[0] = x[idx]; xw[1] = x[1+idx];
277             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
278           }
279           t[i2] = s[0]; t[i2+1] = s[1];
280           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
281           x[i2]   = xw[0]; x[i2+1] = xw[1];
282           idiag  += 4;
283           i2     += 2;
284         }
285         break;
286       case 3:
287         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
288         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
289         i2     = 3;
290         idiag += 9;
291         for (i=1; i<m; i++) {
292           v  = aa + 9*ai[i];
293           vi = aj + ai[i];
294           nz = diag[i] - ai[i];
295           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
296           while (nz--) {
297             idx = 3*(*vi++);
298             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
299             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
300             v  += 9;
301           }
302           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
303           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
304           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
305           idiag  += 9;
306           i2     += 3;
307         }
308         break;
309       case 4:
310         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
311         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
312         i2     = 4;
313         idiag += 16;
314         for (i=1; i<m; i++) {
315           v  = aa + 16*ai[i];
316           vi = aj + ai[i];
317           nz = diag[i] - ai[i];
318           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
319           while (nz--) {
320             idx = 4*(*vi++);
321             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
322             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
323             v  += 16;
324           }
325           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
326           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
327           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
328           idiag  += 16;
329           i2     += 4;
330         }
331         break;
332       case 5:
333         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
334         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
335         i2     = 5;
336         idiag += 25;
337         for (i=1; i<m; i++) {
338           v  = aa + 25*ai[i];
339           vi = aj + ai[i];
340           nz = diag[i] - ai[i];
341           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
342           while (nz--) {
343             idx = 5*(*vi++);
344             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
345             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
346             v  += 25;
347           }
348           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
349           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
350           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
351           idiag  += 25;
352           i2     += 5;
353         }
354         break;
355       case 6:
356         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
357         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
358         i2     = 6;
359         idiag += 36;
360         for (i=1; i<m; i++) {
361           v  = aa + 36*ai[i];
362           vi = aj + ai[i];
363           nz = diag[i] - ai[i];
364           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
365           while (nz--) {
366             idx = 6*(*vi++);
367             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
368             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
369             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
370             v  += 36;
371           }
372           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
373           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
374           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
375           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
376           idiag  += 36;
377           i2     += 6;
378         }
379         break;
380       case 7:
381         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
382         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
383         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
384         i2     = 7;
385         idiag += 49;
386         for (i=1; i<m; i++) {
387           v  = aa + 49*ai[i];
388           vi = aj + ai[i];
389           nz = diag[i] - ai[i];
390           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
391           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
392           while (nz--) {
393             idx = 7*(*vi++);
394             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
395             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
396             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
397             v  += 49;
398           }
399           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
400           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
401           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
402           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
403           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
404           idiag  += 49;
405           i2     += 7;
406         }
407         break;
408       default:
409         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
410         PetscCall(PetscArraycpy(t,b,bs));
411         i2     = bs;
412         idiag += bs2;
413         for (i=1; i<m; i++) {
414           v  = aa + bs2*ai[i];
415           vi = aj + ai[i];
416           nz = diag[i] - ai[i];
417 
418           PetscCall(PetscArraycpy(w,b+i2,bs));
419           /* copy all rows of x that are needed into contiguous space */
420           workt = work;
421           for (j=0; j<nz; j++) {
422             PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs));
423             workt += bs;
424           }
425           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
426           PetscCall(PetscArraycpy(t+i2,w,bs));
427           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
428 
429           idiag += bs2;
430           i2    += bs;
431         }
432         break;
433       }
434       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
435       PetscCall(PetscLogFlops(1.0*bs2*a->nz));
436       xb = t;
437     }
438     else xb = b;
439     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
440       idiag = a->idiag+bs2*(a->mbs-1);
441       i2 = bs * (m-1);
442       switch (bs) {
443       case 1:
444         s[0]  = xb[i2];
445         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
446         x[i2] = xw[0];
447         i2   -= 1;
448         for (i=m-2; i>=0; i--) {
449           v  = aa + (diag[i]+1);
450           vi = aj + diag[i] + 1;
451           nz = ai[i+1] - diag[i] - 1;
452           s[0] = xb[i2];
453           for (j=0; j<nz; j++) {
454             xw[0] = x[vi[j]];
455             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
456           }
457           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
458           x[i2]  = xw[0];
459           idiag -= 1;
460           i2    -= 1;
461         }
462         break;
463       case 2:
464         s[0]  = xb[i2]; s[1] = xb[i2+1];
465         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
466         x[i2] = xw[0]; x[i2+1] = xw[1];
467         i2    -= 2;
468         idiag -= 4;
469         for (i=m-2; i>=0; i--) {
470           v  = aa + 4*(diag[i] + 1);
471           vi = aj + diag[i] + 1;
472           nz = ai[i+1] - diag[i] - 1;
473           s[0] = xb[i2]; s[1] = xb[i2+1];
474           for (j=0; j<nz; j++) {
475             idx = 2*vi[j];
476             it  = 4*j;
477             xw[0] = x[idx]; xw[1] = x[1+idx];
478             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
479           }
480           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
481           x[i2]   = xw[0]; x[i2+1] = xw[1];
482           idiag  -= 4;
483           i2     -= 2;
484         }
485         break;
486       case 3:
487         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
488         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
489         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
490         i2    -= 3;
491         idiag -= 9;
492         for (i=m-2; i>=0; i--) {
493           v  = aa + 9*(diag[i]+1);
494           vi = aj + diag[i] + 1;
495           nz = ai[i+1] - diag[i] - 1;
496           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
497           while (nz--) {
498             idx = 3*(*vi++);
499             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
500             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
501             v  += 9;
502           }
503           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
504           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
505           idiag  -= 9;
506           i2     -= 3;
507         }
508         break;
509       case 4:
510         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
511         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
512         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
513         i2    -= 4;
514         idiag -= 16;
515         for (i=m-2; i>=0; i--) {
516           v  = aa + 16*(diag[i]+1);
517           vi = aj + diag[i] + 1;
518           nz = ai[i+1] - diag[i] - 1;
519           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
520           while (nz--) {
521             idx = 4*(*vi++);
522             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
523             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
524             v  += 16;
525           }
526           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
527           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
528           idiag  -= 16;
529           i2     -= 4;
530         }
531         break;
532       case 5:
533         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
534         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
535         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
536         i2    -= 5;
537         idiag -= 25;
538         for (i=m-2; i>=0; i--) {
539           v  = aa + 25*(diag[i]+1);
540           vi = aj + diag[i] + 1;
541           nz = ai[i+1] - diag[i] - 1;
542           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
543           while (nz--) {
544             idx = 5*(*vi++);
545             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
546             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
547             v  += 25;
548           }
549           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
550           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
551           idiag  -= 25;
552           i2     -= 5;
553         }
554         break;
555       case 6:
556         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
557         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
558         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
559         i2    -= 6;
560         idiag -= 36;
561         for (i=m-2; i>=0; i--) {
562           v  = aa + 36*(diag[i]+1);
563           vi = aj + diag[i] + 1;
564           nz = ai[i+1] - diag[i] - 1;
565           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
566           while (nz--) {
567             idx = 6*(*vi++);
568             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
569             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
570             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
571             v  += 36;
572           }
573           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
574           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
575           idiag  -= 36;
576           i2     -= 6;
577         }
578         break;
579       case 7:
580         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
581         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
582         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
583         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
584         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
585         i2    -= 7;
586         idiag -= 49;
587         for (i=m-2; i>=0; i--) {
588           v  = aa + 49*(diag[i]+1);
589           vi = aj + diag[i] + 1;
590           nz = ai[i+1] - diag[i] - 1;
591           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
592           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
593           while (nz--) {
594             idx = 7*(*vi++);
595             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
596             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
597             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
598             v  += 49;
599           }
600           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
601           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
602           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
603           idiag  -= 49;
604           i2     -= 7;
605         }
606         break;
607       default:
608         PetscCall(PetscArraycpy(w,xb+i2,bs));
609         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
610         i2    -= bs;
611         idiag -= bs2;
612         for (i=m-2; i>=0; i--) {
613           v  = aa + bs2*(diag[i]+1);
614           vi = aj + diag[i] + 1;
615           nz = ai[i+1] - diag[i] - 1;
616 
617           PetscCall(PetscArraycpy(w,xb+i2,bs));
618           /* copy all rows of x that are needed into contiguous space */
619           workt = work;
620           for (j=0; j<nz; j++) {
621             PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs));
622             workt += bs;
623           }
624           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
625           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
626 
627           idiag -= bs2;
628           i2    -= bs;
629         }
630         break;
631       }
632       PetscCall(PetscLogFlops(1.0*bs2*(a->nz)));
633     }
634     its--;
635   }
636   while (its--) {
637     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
638       idiag = a->idiag;
639       i2 = 0;
640       switch (bs) {
641       case 1:
642         for (i=0; i<m; i++) {
643           v  = aa + ai[i];
644           vi = aj + ai[i];
645           nz = ai[i+1] - ai[i];
646           s[0] = b[i2];
647           for (j=0; j<nz; j++) {
648             xw[0] = x[vi[j]];
649             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
650           }
651           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
652           x[i2] += xw[0];
653           idiag += 1;
654           i2    += 1;
655         }
656         break;
657       case 2:
658         for (i=0; i<m; i++) {
659           v  = aa + 4*ai[i];
660           vi = aj + ai[i];
661           nz = ai[i+1] - ai[i];
662           s[0] = b[i2]; s[1] = b[i2+1];
663           for (j=0; j<nz; j++) {
664             idx = 2*vi[j];
665             it  = 4*j;
666             xw[0] = x[idx]; xw[1] = x[1+idx];
667             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
668           }
669           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
670           x[i2]  += xw[0]; x[i2+1] += xw[1];
671           idiag  += 4;
672           i2     += 2;
673         }
674         break;
675       case 3:
676         for (i=0; i<m; i++) {
677           v  = aa + 9*ai[i];
678           vi = aj + ai[i];
679           nz = ai[i+1] - ai[i];
680           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
681           while (nz--) {
682             idx = 3*(*vi++);
683             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
684             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
685             v  += 9;
686           }
687           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
688           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
689           idiag  += 9;
690           i2     += 3;
691         }
692         break;
693       case 4:
694         for (i=0; i<m; i++) {
695           v  = aa + 16*ai[i];
696           vi = aj + ai[i];
697           nz = ai[i+1] - ai[i];
698           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
699           while (nz--) {
700             idx = 4*(*vi++);
701             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
702             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
703             v  += 16;
704           }
705           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
706           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
707           idiag  += 16;
708           i2     += 4;
709         }
710         break;
711       case 5:
712         for (i=0; i<m; i++) {
713           v  = aa + 25*ai[i];
714           vi = aj + ai[i];
715           nz = ai[i+1] - ai[i];
716           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
717           while (nz--) {
718             idx = 5*(*vi++);
719             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
720             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
721             v  += 25;
722           }
723           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
724           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
725           idiag  += 25;
726           i2     += 5;
727         }
728         break;
729       case 6:
730         for (i=0; i<m; i++) {
731           v  = aa + 36*ai[i];
732           vi = aj + ai[i];
733           nz = ai[i+1] - ai[i];
734           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
735           while (nz--) {
736             idx = 6*(*vi++);
737             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
738             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
739             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
740             v  += 36;
741           }
742           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
743           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
744           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
745           idiag  += 36;
746           i2     += 6;
747         }
748         break;
749       case 7:
750         for (i=0; i<m; i++) {
751           v  = aa + 49*ai[i];
752           vi = aj + ai[i];
753           nz = ai[i+1] - ai[i];
754           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
755           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
756           while (nz--) {
757             idx = 7*(*vi++);
758             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
759             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
760             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
761             v  += 49;
762           }
763           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
764           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
765           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
766           idiag  += 49;
767           i2     += 7;
768         }
769         break;
770       default:
771         for (i=0; i<m; i++) {
772           v  = aa + bs2*ai[i];
773           vi = aj + ai[i];
774           nz = ai[i+1] - ai[i];
775 
776           PetscCall(PetscArraycpy(w,b+i2,bs));
777           /* copy all rows of x that are needed into contiguous space */
778           workt = work;
779           for (j=0; j<nz; j++) {
780             PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs));
781             workt += bs;
782           }
783           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
784           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
785 
786           idiag += bs2;
787           i2    += bs;
788         }
789         break;
790       }
791       PetscCall(PetscLogFlops(2.0*bs2*a->nz));
792     }
793     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
794       idiag = a->idiag+bs2*(a->mbs-1);
795       i2 = bs * (m-1);
796       switch (bs) {
797       case 1:
798         for (i=m-1; i>=0; i--) {
799           v  = aa + ai[i];
800           vi = aj + ai[i];
801           nz = ai[i+1] - ai[i];
802           s[0] = b[i2];
803           for (j=0; j<nz; j++) {
804             xw[0] = x[vi[j]];
805             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
806           }
807           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
808           x[i2] += xw[0];
809           idiag -= 1;
810           i2    -= 1;
811         }
812         break;
813       case 2:
814         for (i=m-1; i>=0; i--) {
815           v  = aa + 4*ai[i];
816           vi = aj + ai[i];
817           nz = ai[i+1] - ai[i];
818           s[0] = b[i2]; s[1] = b[i2+1];
819           for (j=0; j<nz; j++) {
820             idx = 2*vi[j];
821             it  = 4*j;
822             xw[0] = x[idx]; xw[1] = x[1+idx];
823             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
824           }
825           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
826           x[i2]  += xw[0]; x[i2+1] += xw[1];
827           idiag  -= 4;
828           i2     -= 2;
829         }
830         break;
831       case 3:
832         for (i=m-1; i>=0; i--) {
833           v  = aa + 9*ai[i];
834           vi = aj + ai[i];
835           nz = ai[i+1] - ai[i];
836           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
837           while (nz--) {
838             idx = 3*(*vi++);
839             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
840             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
841             v  += 9;
842           }
843           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
844           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
845           idiag  -= 9;
846           i2     -= 3;
847         }
848         break;
849       case 4:
850         for (i=m-1; i>=0; i--) {
851           v  = aa + 16*ai[i];
852           vi = aj + ai[i];
853           nz = ai[i+1] - ai[i];
854           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
855           while (nz--) {
856             idx = 4*(*vi++);
857             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
858             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
859             v  += 16;
860           }
861           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
862           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
863           idiag  -= 16;
864           i2     -= 4;
865         }
866         break;
867       case 5:
868         for (i=m-1; i>=0; i--) {
869           v  = aa + 25*ai[i];
870           vi = aj + ai[i];
871           nz = ai[i+1] - ai[i];
872           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
873           while (nz--) {
874             idx = 5*(*vi++);
875             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
876             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
877             v  += 25;
878           }
879           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
880           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
881           idiag  -= 25;
882           i2     -= 5;
883         }
884         break;
885       case 6:
886         for (i=m-1; i>=0; i--) {
887           v  = aa + 36*ai[i];
888           vi = aj + ai[i];
889           nz = ai[i+1] - ai[i];
890           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
891           while (nz--) {
892             idx = 6*(*vi++);
893             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
894             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
895             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
896             v  += 36;
897           }
898           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
899           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
900           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
901           idiag  -= 36;
902           i2     -= 6;
903         }
904         break;
905       case 7:
906         for (i=m-1; i>=0; i--) {
907           v  = aa + 49*ai[i];
908           vi = aj + ai[i];
909           nz = ai[i+1] - ai[i];
910           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
911           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
912           while (nz--) {
913             idx = 7*(*vi++);
914             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
915             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
916             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
917             v  += 49;
918           }
919           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
920           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
921           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
922           idiag  -= 49;
923           i2     -= 7;
924         }
925         break;
926       default:
927         for (i=m-1; i>=0; i--) {
928           v  = aa + bs2*ai[i];
929           vi = aj + ai[i];
930           nz = ai[i+1] - ai[i];
931 
932           PetscCall(PetscArraycpy(w,b+i2,bs));
933           /* copy all rows of x that are needed into contiguous space */
934           workt = work;
935           for (j=0; j<nz; j++) {
936             PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs));
937             workt += bs;
938           }
939           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
940           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
941 
942           idiag -= bs2;
943           i2    -= bs;
944         }
945         break;
946       }
947       PetscCall(PetscLogFlops(2.0*bs2*(a->nz)));
948     }
949   }
950   PetscCall(VecRestoreArray(xx,&x));
951   PetscCall(VecRestoreArrayRead(bb,&b));
952   PetscFunctionReturn(0);
953 }
954 
955 /*
956     Special version for direct calls from Fortran (Used in PETSc-fun3d)
957 */
958 #if defined(PETSC_HAVE_FORTRAN_CAPS)
959 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
960 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
961 #define matsetvaluesblocked4_ matsetvaluesblocked4
962 #endif
963 
964 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
965 {
966   Mat               A  = *AA;
967   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
968   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
969   PetscInt          *ai    =a->i,*ailen=a->ilen;
970   PetscInt          *aj    =a->j,stepval,lastcol = -1;
971   const PetscScalar *value = v;
972   MatScalar         *ap,*aa = a->a,*bap;
973 
974   PetscFunctionBegin;
975   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
976   stepval = (n-1)*4;
977   for (k=0; k<m; k++) { /* loop over added rows */
978     row  = im[k];
979     rp   = aj + ai[row];
980     ap   = aa + 16*ai[row];
981     nrow = ailen[row];
982     low  = 0;
983     high = nrow;
984     for (l=0; l<n; l++) { /* loop over added columns */
985       col = in[l];
986       if (col <= lastcol)  low = 0;
987       else                high = nrow;
988       lastcol = col;
989       value   = v + k*(stepval+4 + l)*4;
990       while (high-low > 7) {
991         t = (low+high)/2;
992         if (rp[t] > col) high = t;
993         else             low  = t;
994       }
995       for (i=low; i<high; i++) {
996         if (rp[i] > col) break;
997         if (rp[i] == col) {
998           bap = ap +  16*i;
999           for (ii=0; ii<4; ii++,value+=stepval) {
1000             for (jj=ii; jj<16; jj+=4) {
1001               bap[jj] += *value++;
1002             }
1003           }
1004           goto noinsert2;
1005         }
1006       }
1007       N = nrow++ - 1;
1008       high++; /* added new column index thus must search to one higher than before */
1009       /* shift up all the later entries in this row */
1010       for (ii=N; ii>=i; ii--) {
1011         rp[ii+1] = rp[ii];
1012         PetscCallVoid(PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16));
1013       }
1014       if (N >= i) {
1015         PetscCallVoid(PetscArrayzero(ap+16*i,16));
1016       }
1017       rp[i] = col;
1018       bap   = ap +  16*i;
1019       for (ii=0; ii<4; ii++,value+=stepval) {
1020         for (jj=ii; jj<16; jj+=4) {
1021           bap[jj] = *value++;
1022         }
1023       }
1024       noinsert2:;
1025       low = i;
1026     }
1027     ailen[row] = nrow;
1028   }
1029   PetscFunctionReturnVoid();
1030 }
1031 
1032 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1033 #define matsetvalues4_ MATSETVALUES4
1034 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1035 #define matsetvalues4_ matsetvalues4
1036 #endif
1037 
1038 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1039 {
1040   Mat         A  = *AA;
1041   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1042   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
1043   PetscInt    *ai=a->i,*ailen=a->ilen;
1044   PetscInt    *aj=a->j,brow,bcol;
1045   PetscInt    ridx,cidx,lastcol = -1;
1046   MatScalar   *ap,value,*aa=a->a,*bap;
1047 
1048   PetscFunctionBegin;
1049   for (k=0; k<m; k++) { /* loop over added rows */
1050     row  = im[k]; brow = row/4;
1051     rp   = aj + ai[brow];
1052     ap   = aa + 16*ai[brow];
1053     nrow = ailen[brow];
1054     low  = 0;
1055     high = nrow;
1056     for (l=0; l<n; l++) { /* loop over added columns */
1057       col   = in[l]; bcol = col/4;
1058       ridx  = row % 4; cidx = col % 4;
1059       value = v[l + k*n];
1060       if (col <= lastcol)  low = 0;
1061       else                high = nrow;
1062       lastcol = col;
1063       while (high-low > 7) {
1064         t = (low+high)/2;
1065         if (rp[t] > bcol) high = t;
1066         else              low  = t;
1067       }
1068       for (i=low; i<high; i++) {
1069         if (rp[i] > bcol) break;
1070         if (rp[i] == bcol) {
1071           bap   = ap +  16*i + 4*cidx + ridx;
1072           *bap += value;
1073           goto noinsert1;
1074         }
1075       }
1076       N = nrow++ - 1;
1077       high++; /* added new column thus must search to one higher than before */
1078       /* shift up all the later entries in this row */
1079       PetscCallVoid(PetscArraymove(rp+i+1,rp+i,N-i+1));
1080       PetscCallVoid(PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1)));
1081       PetscCallVoid(PetscArrayzero(ap+16*i,16));
1082       rp[i]                    = bcol;
1083       ap[16*i + 4*cidx + ridx] = value;
1084 noinsert1:;
1085       low = i;
1086     }
1087     ailen[brow] = nrow;
1088   }
1089   PetscFunctionReturnVoid();
1090 }
1091 
1092 /*
1093      Checks for missing diagonals
1094 */
1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1096 {
1097   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1098   PetscInt       *diag,*ii = a->i,i;
1099 
1100   PetscFunctionBegin;
1101   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
1102   *missing = PETSC_FALSE;
1103   if (A->rmap->n > 0 && !ii) {
1104     *missing = PETSC_TRUE;
1105     if (d) *d = 0;
1106     PetscCall(PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"));
1107   } else {
1108     PetscInt n;
1109     n = PetscMin(a->mbs, a->nbs);
1110     diag = a->diag;
1111     for (i=0; i<n; i++) {
1112       if (diag[i] >= ii[i+1]) {
1113         *missing = PETSC_TRUE;
1114         if (d) *d = i;
1115         PetscCall(PetscInfo(A,"Matrix is missing block diagonal number %" PetscInt_FMT "\n",i));
1116         break;
1117       }
1118     }
1119   }
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1124 {
1125   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1126   PetscInt       i,j,m = a->mbs;
1127 
1128   PetscFunctionBegin;
1129   if (!a->diag) {
1130     PetscCall(PetscMalloc1(m,&a->diag));
1131     PetscCall(PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt)));
1132     a->free_diag = PETSC_TRUE;
1133   }
1134   for (i=0; i<m; i++) {
1135     a->diag[i] = a->i[i+1];
1136     for (j=a->i[i]; j<a->i[i+1]; j++) {
1137       if (a->j[j] == i) {
1138         a->diag[i] = j;
1139         break;
1140       }
1141     }
1142   }
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1147 {
1148   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1149   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1150   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1151 
1152   PetscFunctionBegin;
1153   *nn = n;
1154   if (!ia) PetscFunctionReturn(0);
1155   if (symmetric) {
1156     PetscCall(MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja));
1157     nz   = tia[n];
1158   } else {
1159     tia = a->i; tja = a->j;
1160   }
1161 
1162   if (!blockcompressed && bs > 1) {
1163     (*nn) *= bs;
1164     /* malloc & create the natural set of indices */
1165     PetscCall(PetscMalloc1((n+1)*bs,ia));
1166     if (n) {
1167       (*ia)[0] = oshift;
1168       for (j=1; j<bs; j++) {
1169         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1170       }
1171     }
1172 
1173     for (i=1; i<n; i++) {
1174       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1175       for (j=1; j<bs; j++) {
1176         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1177       }
1178     }
1179     if (n) {
1180       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1181     }
1182 
1183     if (inja) {
1184       PetscCall(PetscMalloc1(nz*bs*bs,ja));
1185       cnt = 0;
1186       for (i=0; i<n; i++) {
1187         for (j=0; j<bs; j++) {
1188           for (k=tia[i]; k<tia[i+1]; k++) {
1189             for (l=0; l<bs; l++) {
1190               (*ja)[cnt++] = bs*tja[k] + l;
1191             }
1192           }
1193         }
1194       }
1195     }
1196 
1197     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1198       PetscCall(PetscFree(tia));
1199       PetscCall(PetscFree(tja));
1200     }
1201   } else if (oshift == 1) {
1202     if (symmetric) {
1203       nz = tia[A->rmap->n/bs];
1204       /*  add 1 to i and j indices */
1205       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1206       *ia = tia;
1207       if (ja) {
1208         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1209         *ja = tja;
1210       }
1211     } else {
1212       nz = a->i[A->rmap->n/bs];
1213       /* malloc space and  add 1 to i and j indices */
1214       PetscCall(PetscMalloc1(A->rmap->n/bs+1,ia));
1215       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1216       if (ja) {
1217         PetscCall(PetscMalloc1(nz,ja));
1218         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1219       }
1220     }
1221   } else {
1222     *ia = tia;
1223     if (ja) *ja = tja;
1224   }
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1229 {
1230   PetscFunctionBegin;
1231   if (!ia) PetscFunctionReturn(0);
1232   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1233     PetscCall(PetscFree(*ia));
1234     if (ja) PetscCall(PetscFree(*ja));
1235   }
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1240 {
1241   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1242 
1243   PetscFunctionBegin;
1244 #if defined(PETSC_USE_LOG)
1245   PetscLogObjectState((PetscObject)A,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT,A->rmap->N,A->cmap->n,a->nz);
1246 #endif
1247   PetscCall(MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i));
1248   PetscCall(ISDestroy(&a->row));
1249   PetscCall(ISDestroy(&a->col));
1250   if (a->free_diag) PetscCall(PetscFree(a->diag));
1251   PetscCall(PetscFree(a->idiag));
1252   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax,a->ilen));
1253   PetscCall(PetscFree(a->solve_work));
1254   PetscCall(PetscFree(a->mult_work));
1255   PetscCall(PetscFree(a->sor_workt));
1256   PetscCall(PetscFree(a->sor_work));
1257   PetscCall(ISDestroy(&a->icol));
1258   PetscCall(PetscFree(a->saved_values));
1259   PetscCall(PetscFree2(a->compressedrow.i,a->compressedrow.rindex));
1260 
1261   PetscCall(MatDestroy(&a->sbaijMat));
1262   PetscCall(MatDestroy(&a->parent));
1263   PetscCall(PetscFree(A->data));
1264 
1265   PetscCall(PetscObjectChangeTypeName((PetscObject)A,NULL));
1266   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL));
1267   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL));
1268   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL));
1269   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL));
1270   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL));
1271   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL));
1272   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL));
1273   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL));
1274   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL));
1275   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL));
1276   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL));
1277 #if defined(PETSC_HAVE_HYPRE)
1278   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL));
1279 #endif
1280   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL));
1281   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL));
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1286 {
1287   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1288 
1289   PetscFunctionBegin;
1290   switch (op) {
1291   case MAT_ROW_ORIENTED:
1292     a->roworiented = flg;
1293     break;
1294   case MAT_KEEP_NONZERO_PATTERN:
1295     a->keepnonzeropattern = flg;
1296     break;
1297   case MAT_NEW_NONZERO_LOCATIONS:
1298     a->nonew = (flg ? 0 : 1);
1299     break;
1300   case MAT_NEW_NONZERO_LOCATION_ERR:
1301     a->nonew = (flg ? -1 : 0);
1302     break;
1303   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1304     a->nonew = (flg ? -2 : 0);
1305     break;
1306   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1307     a->nounused = (flg ? -1 : 0);
1308     break;
1309   case MAT_FORCE_DIAGONAL_ENTRIES:
1310   case MAT_IGNORE_OFF_PROC_ENTRIES:
1311   case MAT_USE_HASH_TABLE:
1312   case MAT_SORTED_FULL:
1313     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1314     break;
1315   case MAT_SPD:
1316   case MAT_SYMMETRIC:
1317   case MAT_STRUCTURALLY_SYMMETRIC:
1318   case MAT_HERMITIAN:
1319   case MAT_SYMMETRY_ETERNAL:
1320   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1321   case MAT_SUBMAT_SINGLEIS:
1322   case MAT_STRUCTURE_ONLY:
1323   case MAT_SPD_ETERNAL:
1324     /* if the diagonal matrix is square it inherits some of the properties above */
1325     break;
1326   default:
1327     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1328   }
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1333 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1334 {
1335   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1336   MatScalar      *aa_i;
1337   PetscScalar    *v_i;
1338 
1339   PetscFunctionBegin;
1340   bs  = A->rmap->bs;
1341   bs2 = bs*bs;
1342   PetscCheck(row >= 0 && row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range", row);
1343 
1344   bn  = row/bs;   /* Block number */
1345   bp  = row % bs; /* Block Position */
1346   M   = ai[bn+1] - ai[bn];
1347   *nz = bs*M;
1348 
1349   if (v) {
1350     *v = NULL;
1351     if (*nz) {
1352       PetscCall(PetscMalloc1(*nz,v));
1353       for (i=0; i<M; i++) { /* for each block in the block row */
1354         v_i  = *v + i*bs;
1355         aa_i = aa + bs2*(ai[bn] + i);
1356         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1357       }
1358     }
1359   }
1360 
1361   if (idx) {
1362     *idx = NULL;
1363     if (*nz) {
1364       PetscCall(PetscMalloc1(*nz,idx));
1365       for (i=0; i<M; i++) { /* for each block in the block row */
1366         idx_i = *idx + i*bs;
1367         itmp  = bs*aj[ai[bn] + i];
1368         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1369       }
1370     }
1371   }
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1376 {
1377   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1378 
1379   PetscFunctionBegin;
1380   PetscCall(MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a));
1381   PetscFunctionReturn(0);
1382 }
1383 
1384 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1385 {
1386   PetscFunctionBegin;
1387   if (nz)  *nz = 0;
1388   if (idx) PetscCall(PetscFree(*idx));
1389   if (v)   PetscCall(PetscFree(*v));
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1394 {
1395   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1396   Mat            C;
1397   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1398   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1399   MatScalar      *ata,*aa=a->a;
1400 
1401   PetscFunctionBegin;
1402   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A,*B));
1403   PetscCall(PetscCalloc1(1+nbs,&atfill));
1404   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1405     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1406 
1407     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&C));
1408     PetscCall(MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N));
1409     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
1410     PetscCall(MatSeqBAIJSetPreallocation(C,bs,0,atfill));
1411 
1412     at  = (Mat_SeqBAIJ*)C->data;
1413     ati = at->i;
1414     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1415   } else {
1416     C = *B;
1417     at = (Mat_SeqBAIJ*)C->data;
1418     ati = at->i;
1419   }
1420 
1421   atj = at->j;
1422   ata = at->a;
1423 
1424   /* Copy ati into atfill so we have locations of the next free space in atj */
1425   PetscCall(PetscArraycpy(atfill,ati,nbs));
1426 
1427   /* Walk through A row-wise and mark nonzero entries of A^T. */
1428   for (i=0; i<mbs; i++) {
1429     anzj = ai[i+1] - ai[i];
1430     for (j=0; j<anzj; j++) {
1431       atj[atfill[*aj]] = i;
1432       for (kr=0; kr<bs; kr++) {
1433         for (k=0; k<bs; k++) {
1434           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1435         }
1436       }
1437       atfill[*aj++] += 1;
1438     }
1439   }
1440   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
1441   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
1442 
1443   /* Clean up temporary space and complete requests. */
1444   PetscCall(PetscFree(atfill));
1445 
1446   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1447     PetscCall(MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs)));
1448     *B = C;
1449   } else {
1450     PetscCall(MatHeaderMerge(A,&C));
1451   }
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1456 {
1457   Mat            Btrans;
1458 
1459   PetscFunctionBegin;
1460   *f   = PETSC_FALSE;
1461   PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&Btrans));
1462   PetscCall(MatEqual_SeqBAIJ(B,Btrans,f));
1463   PetscCall(MatDestroy(&Btrans));
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1468 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
1469 {
1470   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)mat->data;
1471   PetscInt       header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1472   PetscInt       *rowlens,*colidxs;
1473   PetscScalar    *matvals;
1474 
1475   PetscFunctionBegin;
1476   PetscCall(PetscViewerSetUp(viewer));
1477 
1478   M  = mat->rmap->N;
1479   N  = mat->cmap->N;
1480   m  = mat->rmap->n;
1481   bs = mat->rmap->bs;
1482   nz = bs*bs*A->nz;
1483 
1484   /* write matrix header */
1485   header[0] = MAT_FILE_CLASSID;
1486   header[1] = M; header[2] = N; header[3] = nz;
1487   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1488 
1489   /* store row lengths */
1490   PetscCall(PetscMalloc1(m,&rowlens));
1491   for (cnt=0, i=0; i<A->mbs; i++)
1492     for (j=0; j<bs; j++)
1493       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1494   PetscCall(PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT));
1495   PetscCall(PetscFree(rowlens));
1496 
1497   /* store column indices  */
1498   PetscCall(PetscMalloc1(nz,&colidxs));
1499   for (cnt=0, i=0; i<A->mbs; i++)
1500     for (k=0; k<bs; k++)
1501       for (j=A->i[i]; j<A->i[i+1]; j++)
1502         for (l=0; l<bs; l++)
1503           colidxs[cnt++] = bs*A->j[j] + l;
1504   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1505   PetscCall(PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT));
1506   PetscCall(PetscFree(colidxs));
1507 
1508   /* store nonzero values */
1509   PetscCall(PetscMalloc1(nz,&matvals));
1510   for (cnt=0, i=0; i<A->mbs; i++)
1511     for (k=0; k<bs; k++)
1512       for (j=A->i[i]; j<A->i[i+1]; j++)
1513         for (l=0; l<bs; l++)
1514           matvals[cnt++] = A->a[bs*(bs*j + l) + k];
1515   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1516   PetscCall(PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR));
1517   PetscCall(PetscFree(matvals));
1518 
1519   /* write block size option to the viewer's .info file */
1520   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1525 {
1526   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1527   PetscInt       i,bs = A->rmap->bs,k;
1528 
1529   PetscFunctionBegin;
1530   PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
1531   for (i=0; i<a->mbs; i++) {
1532     PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT "-%" PetscInt_FMT ":",i*bs,i*bs+bs-1));
1533     for (k=a->i[i]; k<a->i[i+1]; k++) {
1534       PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT "-%" PetscInt_FMT ") ",bs*a->j[k],bs*a->j[k]+bs-1));
1535     }
1536     PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
1537   }
1538   PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1543 {
1544   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1545   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1546   PetscViewerFormat format;
1547 
1548   PetscFunctionBegin;
1549   if (A->structure_only) {
1550     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A,viewer));
1551     PetscFunctionReturn(0);
1552   }
1553 
1554   PetscCall(PetscViewerGetFormat(viewer,&format));
1555   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1556     PetscCall(PetscViewerASCIIPrintf(viewer,"  block size is %" PetscInt_FMT "\n",bs));
1557   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1558     const char *matname;
1559     Mat        aij;
1560     PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij));
1561     PetscCall(PetscObjectGetName((PetscObject)A,&matname));
1562     PetscCall(PetscObjectSetName((PetscObject)aij,matname));
1563     PetscCall(MatView(aij,viewer));
1564     PetscCall(MatDestroy(&aij));
1565   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1566       PetscFunctionReturn(0);
1567   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1568     PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
1569     for (i=0; i<a->mbs; i++) {
1570       for (j=0; j<bs; j++) {
1571         PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j));
1572         for (k=a->i[i]; k<a->i[i+1]; k++) {
1573           for (l=0; l<bs; l++) {
1574 #if defined(PETSC_USE_COMPLEX)
1575             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1576               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %gi) ",bs*a->j[k]+l,
1577                                              (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1578             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1579               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %gi) ",bs*a->j[k]+l,
1580                                              (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1581             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1582               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j])));
1583             }
1584 #else
1585             if (a->a[bs2*k + l*bs + j] != 0.0) {
1586               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]));
1587             }
1588 #endif
1589           }
1590         }
1591         PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
1592       }
1593     }
1594     PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
1595   } else {
1596     PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
1597     for (i=0; i<a->mbs; i++) {
1598       for (j=0; j<bs; j++) {
1599         PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j));
1600         for (k=a->i[i]; k<a->i[i+1]; k++) {
1601           for (l=0; l<bs; l++) {
1602 #if defined(PETSC_USE_COMPLEX)
1603             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1604               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i) ",bs*a->j[k]+l,
1605                                              (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1606             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1607               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i) ",bs*a->j[k]+l,
1608                                              (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1609             } else {
1610               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j])));
1611             }
1612 #else
1613             PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]));
1614 #endif
1615           }
1616         }
1617         PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
1618       }
1619     }
1620     PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
1621   }
1622   PetscCall(PetscViewerFlush(viewer));
1623   PetscFunctionReturn(0);
1624 }
1625 
1626 #include <petscdraw.h>
1627 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1628 {
1629   Mat               A = (Mat) Aa;
1630   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1631   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1632   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1633   MatScalar         *aa;
1634   PetscViewer       viewer;
1635   PetscViewerFormat format;
1636 
1637   PetscFunctionBegin;
1638   PetscCall(PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer));
1639   PetscCall(PetscViewerGetFormat(viewer,&format));
1640   PetscCall(PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr));
1641 
1642   /* loop over matrix elements drawing boxes */
1643 
1644   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1645     PetscDrawCollectiveBegin(draw);
1646     /* Blue for negative, Cyan for zero and  Red for positive */
1647     color = PETSC_DRAW_BLUE;
1648     for (i=0,row=0; i<mbs; i++,row+=bs) {
1649       for (j=a->i[i]; j<a->i[i+1]; j++) {
1650         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1651         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1652         aa  = a->a + j*bs2;
1653         for (k=0; k<bs; k++) {
1654           for (l=0; l<bs; l++) {
1655             if (PetscRealPart(*aa++) >=  0.) continue;
1656             PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color));
1657           }
1658         }
1659       }
1660     }
1661     color = PETSC_DRAW_CYAN;
1662     for (i=0,row=0; i<mbs; i++,row+=bs) {
1663       for (j=a->i[i]; j<a->i[i+1]; j++) {
1664         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1665         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1666         aa  = a->a + j*bs2;
1667         for (k=0; k<bs; k++) {
1668           for (l=0; l<bs; l++) {
1669             if (PetscRealPart(*aa++) != 0.) continue;
1670             PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color));
1671           }
1672         }
1673       }
1674     }
1675     color = PETSC_DRAW_RED;
1676     for (i=0,row=0; i<mbs; i++,row+=bs) {
1677       for (j=a->i[i]; j<a->i[i+1]; j++) {
1678         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1679         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1680         aa  = a->a + j*bs2;
1681         for (k=0; k<bs; k++) {
1682           for (l=0; l<bs; l++) {
1683             if (PetscRealPart(*aa++) <= 0.) continue;
1684             PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color));
1685           }
1686         }
1687       }
1688     }
1689     PetscDrawCollectiveEnd(draw);
1690   } else {
1691     /* use contour shading to indicate magnitude of values */
1692     /* first determine max of all nonzero values */
1693     PetscReal      minv = 0.0, maxv = 0.0;
1694     PetscDraw      popup;
1695 
1696     for (i=0; i<a->nz*a->bs2; i++) {
1697       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1698     }
1699     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1700     PetscCall(PetscDrawGetPopup(draw,&popup));
1701     PetscCall(PetscDrawScalePopup(popup,0.0,maxv));
1702 
1703     PetscDrawCollectiveBegin(draw);
1704     for (i=0,row=0; i<mbs; i++,row+=bs) {
1705       for (j=a->i[i]; j<a->i[i+1]; j++) {
1706         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1707         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1708         aa  = a->a + j*bs2;
1709         for (k=0; k<bs; k++) {
1710           for (l=0; l<bs; l++) {
1711             MatScalar v = *aa++;
1712             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1713             PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color));
1714           }
1715         }
1716       }
1717     }
1718     PetscDrawCollectiveEnd(draw);
1719   }
1720   PetscFunctionReturn(0);
1721 }
1722 
1723 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1724 {
1725   PetscReal      xl,yl,xr,yr,w,h;
1726   PetscDraw      draw;
1727   PetscBool      isnull;
1728 
1729   PetscFunctionBegin;
1730   PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
1731   PetscCall(PetscDrawIsNull(draw,&isnull));
1732   if (isnull) PetscFunctionReturn(0);
1733 
1734   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1735   xr  += w;          yr += h;        xl = -w;     yl = -h;
1736   PetscCall(PetscDrawSetCoordinates(draw,xl,yl,xr,yr));
1737   PetscCall(PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer));
1738   PetscCall(PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A));
1739   PetscCall(PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL));
1740   PetscCall(PetscDrawSave(draw));
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1745 {
1746   PetscBool      iascii,isbinary,isdraw;
1747 
1748   PetscFunctionBegin;
1749   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1750   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1751   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1752   if (iascii) {
1753     PetscCall(MatView_SeqBAIJ_ASCII(A,viewer));
1754   } else if (isbinary) {
1755     PetscCall(MatView_SeqBAIJ_Binary(A,viewer));
1756   } else if (isdraw) {
1757     PetscCall(MatView_SeqBAIJ_Draw(A,viewer));
1758   } else {
1759     Mat B;
1760     PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
1761     PetscCall(MatView(B,viewer));
1762     PetscCall(MatDestroy(&B));
1763   }
1764   PetscFunctionReturn(0);
1765 }
1766 
1767 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1768 {
1769   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1770   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1771   PetscInt    *ai = a->i,*ailen = a->ilen;
1772   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1773   MatScalar   *ap,*aa = a->a;
1774 
1775   PetscFunctionBegin;
1776   for (k=0; k<m; k++) { /* loop over rows */
1777     row = im[k]; brow = row/bs;
1778     if (row < 0) {v += n; continue;} /* negative row */
1779     PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " too large", row);
1780     rp   = aj ? aj + ai[brow] : NULL; /* mustn't add to NULL, that is UB */
1781     ap   = aa ? aa + bs2*ai[brow] : NULL; /* mustn't add to NULL, that is UB */
1782     nrow = ailen[brow];
1783     for (l=0; l<n; l++) { /* loop over columns */
1784       if (in[l] < 0) {v++; continue;} /* negative column */
1785       PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %" PetscInt_FMT " too large", in[l]);
1786       col  = in[l];
1787       bcol = col/bs;
1788       cidx = col%bs;
1789       ridx = row%bs;
1790       high = nrow;
1791       low  = 0; /* assume unsorted */
1792       while (high-low > 5) {
1793         t = (low+high)/2;
1794         if (rp[t] > bcol) high = t;
1795         else             low  = t;
1796       }
1797       for (i=low; i<high; i++) {
1798         if (rp[i] > bcol) break;
1799         if (rp[i] == bcol) {
1800           *v++ = ap[bs2*i+bs*cidx+ridx];
1801           goto finished;
1802         }
1803       }
1804       *v++ = 0.0;
1805 finished:;
1806     }
1807   }
1808   PetscFunctionReturn(0);
1809 }
1810 
1811 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1812 {
1813   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1814   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1815   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1816   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1817   PetscBool         roworiented=a->roworiented;
1818   const PetscScalar *value     = v;
1819   MatScalar         *ap=NULL,*aa = a->a,*bap;
1820 
1821   PetscFunctionBegin;
1822   if (roworiented) {
1823     stepval = (n-1)*bs;
1824   } else {
1825     stepval = (m-1)*bs;
1826   }
1827   for (k=0; k<m; k++) { /* loop over added rows */
1828     row = im[k];
1829     if (row < 0) continue;
1830     PetscCheck(row < a->mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT,row,a->mbs-1);
1831     rp   = aj + ai[row];
1832     if (!A->structure_only) ap = aa + bs2*ai[row];
1833     rmax = imax[row];
1834     nrow = ailen[row];
1835     low  = 0;
1836     high = nrow;
1837     for (l=0; l<n; l++) { /* loop over added columns */
1838       if (in[l] < 0) continue;
1839       PetscCheck(in[l] < a->nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT,in[l],a->nbs-1);
1840       col = in[l];
1841       if (!A->structure_only) {
1842         if (roworiented) {
1843           value = v + (k*(stepval+bs) + l)*bs;
1844         } else {
1845           value = v + (l*(stepval+bs) + k)*bs;
1846         }
1847       }
1848       if (col <= lastcol) low = 0;
1849       else high = nrow;
1850       lastcol = col;
1851       while (high-low > 7) {
1852         t = (low+high)/2;
1853         if (rp[t] > col) high = t;
1854         else             low  = t;
1855       }
1856       for (i=low; i<high; i++) {
1857         if (rp[i] > col) break;
1858         if (rp[i] == col) {
1859           if (A->structure_only) goto noinsert2;
1860           bap = ap +  bs2*i;
1861           if (roworiented) {
1862             if (is == ADD_VALUES) {
1863               for (ii=0; ii<bs; ii++,value+=stepval) {
1864                 for (jj=ii; jj<bs2; jj+=bs) {
1865                   bap[jj] += *value++;
1866                 }
1867               }
1868             } else {
1869               for (ii=0; ii<bs; ii++,value+=stepval) {
1870                 for (jj=ii; jj<bs2; jj+=bs) {
1871                   bap[jj] = *value++;
1872                 }
1873               }
1874             }
1875           } else {
1876             if (is == ADD_VALUES) {
1877               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1878                 for (jj=0; jj<bs; jj++) {
1879                   bap[jj] += value[jj];
1880                 }
1881                 bap += bs;
1882               }
1883             } else {
1884               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1885                 for (jj=0; jj<bs; jj++) {
1886                   bap[jj]  = value[jj];
1887                 }
1888                 bap += bs;
1889               }
1890             }
1891           }
1892           goto noinsert2;
1893         }
1894       }
1895       if (nonew == 1) goto noinsert2;
1896       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
1897       if (A->structure_only) {
1898         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1899       } else {
1900         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1901       }
1902       N = nrow++ - 1; high++;
1903       /* shift up all the later entries in this row */
1904       PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
1905       rp[i] = col;
1906       if (!A->structure_only) {
1907         PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
1908         bap   = ap +  bs2*i;
1909         if (roworiented) {
1910           for (ii=0; ii<bs; ii++,value+=stepval) {
1911             for (jj=ii; jj<bs2; jj+=bs) {
1912               bap[jj] = *value++;
1913             }
1914           }
1915         } else {
1916           for (ii=0; ii<bs; ii++,value+=stepval) {
1917             for (jj=0; jj<bs; jj++) {
1918               *bap++ = *value++;
1919             }
1920           }
1921         }
1922       }
1923 noinsert2:;
1924       low = i;
1925     }
1926     ailen[row] = nrow;
1927   }
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1932 {
1933   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1934   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1935   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1936   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1937   MatScalar      *aa  = a->a,*ap;
1938   PetscReal      ratio=0.6;
1939 
1940   PetscFunctionBegin;
1941   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1942 
1943   if (m) rmax = ailen[0];
1944   for (i=1; i<mbs; i++) {
1945     /* move each row back by the amount of empty slots (fshift) before it*/
1946     fshift += imax[i-1] - ailen[i-1];
1947     rmax    = PetscMax(rmax,ailen[i]);
1948     if (fshift) {
1949       ip = aj + ai[i];
1950       ap = aa + bs2*ai[i];
1951       N  = ailen[i];
1952       PetscCall(PetscArraymove(ip-fshift,ip,N));
1953       if (!A->structure_only) {
1954         PetscCall(PetscArraymove(ap-bs2*fshift,ap,bs2*N));
1955       }
1956     }
1957     ai[i] = ai[i-1] + ailen[i-1];
1958   }
1959   if (mbs) {
1960     fshift += imax[mbs-1] - ailen[mbs-1];
1961     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1962   }
1963 
1964   /* reset ilen and imax for each row */
1965   a->nonzerorowcnt = 0;
1966   if (A->structure_only) {
1967     PetscCall(PetscFree2(a->imax,a->ilen));
1968   } else { /* !A->structure_only */
1969     for (i=0; i<mbs; i++) {
1970       ailen[i] = imax[i] = ai[i+1] - ai[i];
1971       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1972     }
1973   }
1974   a->nz = ai[mbs];
1975 
1976   /* diagonals may have moved, so kill the diagonal pointers */
1977   a->idiagvalid = PETSC_FALSE;
1978   if (fshift && a->diag) {
1979     PetscCall(PetscFree(a->diag));
1980     PetscCall(PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt)));
1981     a->diag = NULL;
1982   }
1983   if (fshift) PetscCheck(a->nounused != -1,PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1984   PetscCall(PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2));
1985   PetscCall(PetscInfo(A,"Number of mallocs during MatSetValues is %" PetscInt_FMT "\n",a->reallocs));
1986   PetscCall(PetscInfo(A,"Most nonzeros blocks in any row is %" PetscInt_FMT "\n",rmax));
1987 
1988   A->info.mallocs    += a->reallocs;
1989   a->reallocs         = 0;
1990   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1991   a->rmax             = rmax;
1992 
1993   if (!A->structure_only) {
1994     PetscCall(MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio));
1995   }
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 /*
2000    This function returns an array of flags which indicate the locations of contiguous
2001    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2002    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2003    Assume: sizes should be long enough to hold all the values.
2004 */
2005 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2006 {
2007   PetscInt  i,j,k,row;
2008   PetscBool flg;
2009 
2010   PetscFunctionBegin;
2011   for (i=0,j=0; i<n; j++) {
2012     row = idx[i];
2013     if (row%bs!=0) { /* Not the beginning of a block */
2014       sizes[j] = 1;
2015       i++;
2016     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2017       sizes[j] = 1;         /* Also makes sure at least 'bs' values exist for next else */
2018       i++;
2019     } else { /* Beginning of the block, so check if the complete block exists */
2020       flg = PETSC_TRUE;
2021       for (k=1; k<bs; k++) {
2022         if (row+k != idx[i+k]) { /* break in the block */
2023           flg = PETSC_FALSE;
2024           break;
2025         }
2026       }
2027       if (flg) { /* No break in the bs */
2028         sizes[j] = bs;
2029         i       += bs;
2030       } else {
2031         sizes[j] = 1;
2032         i++;
2033       }
2034     }
2035   }
2036   *bs_max = j;
2037   PetscFunctionReturn(0);
2038 }
2039 
2040 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2041 {
2042   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2043   PetscInt          i,j,k,count,*rows;
2044   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2045   PetscScalar       zero = 0.0;
2046   MatScalar         *aa;
2047   const PetscScalar *xx;
2048   PetscScalar       *bb;
2049 
2050   PetscFunctionBegin;
2051   /* fix right hand side if needed */
2052   if (x && b) {
2053     PetscCall(VecGetArrayRead(x,&xx));
2054     PetscCall(VecGetArray(b,&bb));
2055     for (i=0; i<is_n; i++) {
2056       bb[is_idx[i]] = diag*xx[is_idx[i]];
2057     }
2058     PetscCall(VecRestoreArrayRead(x,&xx));
2059     PetscCall(VecRestoreArray(b,&bb));
2060   }
2061 
2062   /* Make a copy of the IS and  sort it */
2063   /* allocate memory for rows,sizes */
2064   PetscCall(PetscMalloc2(is_n,&rows,2*is_n,&sizes));
2065 
2066   /* copy IS values to rows, and sort them */
2067   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2068   PetscCall(PetscSortInt(is_n,rows));
2069 
2070   if (baij->keepnonzeropattern) {
2071     for (i=0; i<is_n; i++) sizes[i] = 1;
2072     bs_max          = is_n;
2073   } else {
2074     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max));
2075     A->nonzerostate++;
2076   }
2077 
2078   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2079     row = rows[j];
2080     PetscCheck(row >= 0 && row <= A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %" PetscInt_FMT " out of range",row);
2081     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2082     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2083     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2084       if (diag != (PetscScalar)0.0) {
2085         if (baij->ilen[row/bs] > 0) {
2086           baij->ilen[row/bs]       = 1;
2087           baij->j[baij->i[row/bs]] = row/bs;
2088 
2089           PetscCall(PetscArrayzero(aa,count*bs));
2090         }
2091         /* Now insert all the diagonal values for this bs */
2092         for (k=0; k<bs; k++) {
2093           PetscCall((*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES));
2094         }
2095       } else { /* (diag == 0.0) */
2096         baij->ilen[row/bs] = 0;
2097       } /* end (diag == 0.0) */
2098     } else { /* (sizes[i] != bs) */
2099       PetscAssert(sizes[i] == 1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2100       for (k=0; k<count; k++) {
2101         aa[0] =  zero;
2102         aa   += bs;
2103       }
2104       if (diag != (PetscScalar)0.0) {
2105         PetscCall((*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES));
2106       }
2107     }
2108   }
2109 
2110   PetscCall(PetscFree2(rows,sizes));
2111   PetscCall(MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY));
2112   PetscFunctionReturn(0);
2113 }
2114 
2115 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2116 {
2117   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2118   PetscInt          i,j,k,count;
2119   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2120   PetscScalar       zero = 0.0;
2121   MatScalar         *aa;
2122   const PetscScalar *xx;
2123   PetscScalar       *bb;
2124   PetscBool         *zeroed,vecs = PETSC_FALSE;
2125 
2126   PetscFunctionBegin;
2127   /* fix right hand side if needed */
2128   if (x && b) {
2129     PetscCall(VecGetArrayRead(x,&xx));
2130     PetscCall(VecGetArray(b,&bb));
2131     vecs = PETSC_TRUE;
2132   }
2133 
2134   /* zero the columns */
2135   PetscCall(PetscCalloc1(A->rmap->n,&zeroed));
2136   for (i=0; i<is_n; i++) {
2137     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %" PetscInt_FMT " out of range",is_idx[i]);
2138     zeroed[is_idx[i]] = PETSC_TRUE;
2139   }
2140   for (i=0; i<A->rmap->N; i++) {
2141     if (!zeroed[i]) {
2142       row = i/bs;
2143       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2144         for (k=0; k<bs; k++) {
2145           col = bs*baij->j[j] + k;
2146           if (zeroed[col]) {
2147             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2148             if (vecs) bb[i] -= aa[0]*xx[col];
2149             aa[0] = 0.0;
2150           }
2151         }
2152       }
2153     } else if (vecs) bb[i] = diag*xx[i];
2154   }
2155   PetscCall(PetscFree(zeroed));
2156   if (vecs) {
2157     PetscCall(VecRestoreArrayRead(x,&xx));
2158     PetscCall(VecRestoreArray(b,&bb));
2159   }
2160 
2161   /* zero the rows */
2162   for (i=0; i<is_n; i++) {
2163     row   = is_idx[i];
2164     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2165     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2166     for (k=0; k<count; k++) {
2167       aa[0] =  zero;
2168       aa   += bs;
2169     }
2170     if (diag != (PetscScalar)0.0) {
2171       PetscCall((*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES));
2172     }
2173   }
2174   PetscCall(MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY));
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2179 {
2180   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2181   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2182   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2183   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2184   PetscInt       ridx,cidx,bs2=a->bs2;
2185   PetscBool      roworiented=a->roworiented;
2186   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2187 
2188   PetscFunctionBegin;
2189   for (k=0; k<m; k++) { /* loop over added rows */
2190     row  = im[k];
2191     brow = row/bs;
2192     if (row < 0) continue;
2193     PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->N-1);
2194     rp   = aj + ai[brow];
2195     if (!A->structure_only) ap = aa + bs2*ai[brow];
2196     rmax = imax[brow];
2197     nrow = ailen[brow];
2198     low  = 0;
2199     high = nrow;
2200     for (l=0; l<n; l++) { /* loop over added columns */
2201       if (in[l] < 0) continue;
2202       PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[l],A->cmap->n-1);
2203       col  = in[l]; bcol = col/bs;
2204       ridx = row % bs; cidx = col % bs;
2205       if (!A->structure_only) {
2206         if (roworiented) {
2207           value = v[l + k*n];
2208         } else {
2209           value = v[k + l*m];
2210         }
2211       }
2212       if (col <= lastcol) low = 0; else high = nrow;
2213       lastcol = col;
2214       while (high-low > 7) {
2215         t = (low+high)/2;
2216         if (rp[t] > bcol) high = t;
2217         else              low  = t;
2218       }
2219       for (i=low; i<high; i++) {
2220         if (rp[i] > bcol) break;
2221         if (rp[i] == bcol) {
2222           bap = ap +  bs2*i + bs*cidx + ridx;
2223           if (!A->structure_only) {
2224             if (is == ADD_VALUES) *bap += value;
2225             else                  *bap  = value;
2226           }
2227           goto noinsert1;
2228         }
2229       }
2230       if (nonew == 1) goto noinsert1;
2231       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2232       if (A->structure_only) {
2233         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2234       } else {
2235         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2236       }
2237       N = nrow++ - 1; high++;
2238       /* shift up all the later entries in this row */
2239       PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
2240       rp[i] = bcol;
2241       if (!A->structure_only) {
2242         PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
2243         PetscCall(PetscArrayzero(ap+bs2*i,bs2));
2244         ap[bs2*i + bs*cidx + ridx] = value;
2245       }
2246       a->nz++;
2247       A->nonzerostate++;
2248 noinsert1:;
2249       low = i;
2250     }
2251     ailen[brow] = nrow;
2252   }
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2257 {
2258   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2259   Mat            outA;
2260   PetscBool      row_identity,col_identity;
2261 
2262   PetscFunctionBegin;
2263   PetscCheck(info->levels == 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2264   PetscCall(ISIdentity(row,&row_identity));
2265   PetscCall(ISIdentity(col,&col_identity));
2266   PetscCheck(row_identity && col_identity,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2267 
2268   outA            = inA;
2269   inA->factortype = MAT_FACTOR_LU;
2270   PetscCall(PetscFree(inA->solvertype));
2271   PetscCall(PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype));
2272 
2273   PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2274 
2275   PetscCall(PetscObjectReference((PetscObject)row));
2276   PetscCall(ISDestroy(&a->row));
2277   a->row = row;
2278   PetscCall(PetscObjectReference((PetscObject)col));
2279   PetscCall(ISDestroy(&a->col));
2280   a->col = col;
2281 
2282   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2283   PetscCall(ISDestroy(&a->icol));
2284   PetscCall(ISInvertPermutation(col,PETSC_DECIDE,&a->icol));
2285   PetscCall(PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol));
2286 
2287   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity)));
2288   if (!a->solve_work) {
2289     PetscCall(PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work));
2290     PetscCall(PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar)));
2291   }
2292   PetscCall(MatLUFactorNumeric(outA,inA,info));
2293   PetscFunctionReturn(0);
2294 }
2295 
2296 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2297 {
2298   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2299   PetscInt    i,nz,mbs;
2300 
2301   PetscFunctionBegin;
2302   nz  = baij->maxnz;
2303   mbs = baij->mbs;
2304   for (i=0; i<nz; i++) {
2305     baij->j[i] = indices[i];
2306   }
2307   baij->nz = nz;
2308   for (i=0; i<mbs; i++) {
2309     baij->ilen[i] = baij->imax[i];
2310   }
2311   PetscFunctionReturn(0);
2312 }
2313 
2314 /*@
2315     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2316        in the matrix.
2317 
2318   Input Parameters:
2319 +  mat - the SeqBAIJ matrix
2320 -  indices - the column indices
2321 
2322   Level: advanced
2323 
2324   Notes:
2325     This can be called if you have precomputed the nonzero structure of the
2326   matrix and want to provide it to the matrix object to improve the performance
2327   of the MatSetValues() operation.
2328 
2329     You MUST have set the correct numbers of nonzeros per row in the call to
2330   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2331 
2332     MUST be called before any calls to MatSetValues();
2333 
2334 @*/
2335 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2336 {
2337   PetscFunctionBegin;
2338   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2339   PetscValidIntPointer(indices,2);
2340   PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));
2341   PetscFunctionReturn(0);
2342 }
2343 
2344 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2345 {
2346   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2347   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2348   PetscReal      atmp;
2349   PetscScalar    *x,zero = 0.0;
2350   MatScalar      *aa;
2351   PetscInt       ncols,brow,krow,kcol;
2352 
2353   PetscFunctionBegin;
2354   /* why is this not a macro???????????????????????????????????????????????????????????????? */
2355   PetscCheck(!A->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2356   bs  = A->rmap->bs;
2357   aa  = a->a;
2358   ai  = a->i;
2359   aj  = a->j;
2360   mbs = a->mbs;
2361 
2362   PetscCall(VecSet(v,zero));
2363   PetscCall(VecGetArray(v,&x));
2364   PetscCall(VecGetLocalSize(v,&n));
2365   PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2366   for (i=0; i<mbs; i++) {
2367     ncols = ai[1] - ai[0]; ai++;
2368     brow  = bs*i;
2369     for (j=0; j<ncols; j++) {
2370       for (kcol=0; kcol<bs; kcol++) {
2371         for (krow=0; krow<bs; krow++) {
2372           atmp = PetscAbsScalar(*aa);aa++;
2373           row  = brow + krow;   /* row index */
2374           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2375         }
2376       }
2377       aj++;
2378     }
2379   }
2380   PetscCall(VecRestoreArray(v,&x));
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2385 {
2386   PetscFunctionBegin;
2387   /* If the two matrices have the same copy implementation, use fast copy. */
2388   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2389     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2390     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2391     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2392 
2393     PetscCheck(a->i[ambs] == b->i[bmbs],PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different",a->i[ambs],b->i[bmbs]);
2394     PetscCheck(abs == bbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different",abs,bbs);
2395     PetscCall(PetscArraycpy(b->a,a->a,bs2*a->i[ambs]));
2396     PetscCall(PetscObjectStateIncrease((PetscObject)B));
2397   } else {
2398     PetscCall(MatCopy_Basic(A,B,str));
2399   }
2400   PetscFunctionReturn(0);
2401 }
2402 
2403 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2404 {
2405   PetscFunctionBegin;
2406   PetscCall(MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL));
2407   PetscFunctionReturn(0);
2408 }
2409 
2410 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2411 {
2412   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2413 
2414   PetscFunctionBegin;
2415   *array = a->a;
2416   PetscFunctionReturn(0);
2417 }
2418 
2419 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2420 {
2421   PetscFunctionBegin;
2422   *array = NULL;
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2427 {
2428   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2429   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2430   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2431 
2432   PetscFunctionBegin;
2433   /* Set the number of nonzeros in the new matrix */
2434   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz));
2435   PetscFunctionReturn(0);
2436 }
2437 
2438 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2439 {
2440   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2441   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2442   PetscBLASInt   one=1;
2443 
2444   PetscFunctionBegin;
2445   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2446     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2447     if (e) {
2448       PetscCall(PetscArraycmp(x->i,y->i,x->mbs+1,&e));
2449       if (e) {
2450         PetscCall(PetscArraycmp(x->j,y->j,x->i[x->mbs],&e));
2451         if (e) str = SAME_NONZERO_PATTERN;
2452       }
2453     }
2454     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"MatStructure is not SAME_NONZERO_PATTERN");
2455   }
2456   if (str == SAME_NONZERO_PATTERN) {
2457     PetscScalar  alpha = a;
2458     PetscBLASInt bnz;
2459     PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz));
2460     PetscCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2461     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2462   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2463     PetscCall(MatAXPY_Basic(Y,a,X,str));
2464   } else {
2465     Mat      B;
2466     PetscInt *nnz;
2467     PetscCheck(bs == X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2468     PetscCall(PetscMalloc1(Y->rmap->N,&nnz));
2469     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
2470     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
2471     PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N));
2472     PetscCall(MatSetBlockSizesFromMats(B,Y,Y));
2473     PetscCall(MatSetType(B,(MatType) ((PetscObject)Y)->type_name));
2474     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz));
2475     PetscCall(MatSeqBAIJSetPreallocation(B,bs,0,nnz));
2476     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
2477     PetscCall(MatHeaderMerge(Y,&B));
2478     PetscCall(PetscFree(nnz));
2479   }
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2484 {
2485 #if defined(PETSC_USE_COMPLEX)
2486   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2487   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2488   MatScalar   *aa = a->a;
2489 
2490   PetscFunctionBegin;
2491   for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]);
2492 #else
2493   PetscFunctionBegin;
2494 #endif
2495   PetscFunctionReturn(0);
2496 }
2497 
2498 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2499 {
2500   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2501   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2502   MatScalar   *aa = a->a;
2503 
2504   PetscFunctionBegin;
2505   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2506   PetscFunctionReturn(0);
2507 }
2508 
2509 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2510 {
2511   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2512   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2513   MatScalar   *aa = a->a;
2514 
2515   PetscFunctionBegin;
2516   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 /*
2521     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2522 */
2523 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2524 {
2525   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2526   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2527   PetscInt       nz = a->i[m],row,*jj,mr,col;
2528 
2529   PetscFunctionBegin;
2530   *nn = n;
2531   if (!ia) PetscFunctionReturn(0);
2532   PetscCheck(!symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2533   PetscCall(PetscCalloc1(n,&collengths));
2534   PetscCall(PetscMalloc1(n+1,&cia));
2535   PetscCall(PetscMalloc1(nz,&cja));
2536   jj   = a->j;
2537   for (i=0; i<nz; i++) {
2538     collengths[jj[i]]++;
2539   }
2540   cia[0] = oshift;
2541   for (i=0; i<n; i++) {
2542     cia[i+1] = cia[i] + collengths[i];
2543   }
2544   PetscCall(PetscArrayzero(collengths,n));
2545   jj   = a->j;
2546   for (row=0; row<m; row++) {
2547     mr = a->i[row+1] - a->i[row];
2548     for (i=0; i<mr; i++) {
2549       col = *jj++;
2550 
2551       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2552     }
2553   }
2554   PetscCall(PetscFree(collengths));
2555   *ia  = cia; *ja = cja;
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2560 {
2561   PetscFunctionBegin;
2562   if (!ia) PetscFunctionReturn(0);
2563   PetscCall(PetscFree(*ia));
2564   PetscCall(PetscFree(*ja));
2565   PetscFunctionReturn(0);
2566 }
2567 
2568 /*
2569  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2570  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2571  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2572  */
2573 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2574 {
2575   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2576   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2577   PetscInt       nz = a->i[m],row,*jj,mr,col;
2578   PetscInt       *cspidx;
2579 
2580   PetscFunctionBegin;
2581   *nn = n;
2582   if (!ia) PetscFunctionReturn(0);
2583 
2584   PetscCall(PetscCalloc1(n,&collengths));
2585   PetscCall(PetscMalloc1(n+1,&cia));
2586   PetscCall(PetscMalloc1(nz,&cja));
2587   PetscCall(PetscMalloc1(nz,&cspidx));
2588   jj   = a->j;
2589   for (i=0; i<nz; i++) {
2590     collengths[jj[i]]++;
2591   }
2592   cia[0] = oshift;
2593   for (i=0; i<n; i++) {
2594     cia[i+1] = cia[i] + collengths[i];
2595   }
2596   PetscCall(PetscArrayzero(collengths,n));
2597   jj   = a->j;
2598   for (row=0; row<m; row++) {
2599     mr = a->i[row+1] - a->i[row];
2600     for (i=0; i<mr; i++) {
2601       col = *jj++;
2602       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2603       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2604     }
2605   }
2606   PetscCall(PetscFree(collengths));
2607   *ia    = cia;
2608   *ja    = cja;
2609   *spidx = cspidx;
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2614 {
2615   PetscFunctionBegin;
2616   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done));
2617   PetscCall(PetscFree(*spidx));
2618   PetscFunctionReturn(0);
2619 }
2620 
2621 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2622 {
2623   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2624 
2625   PetscFunctionBegin;
2626   if (!Y->preallocated || !aij->nz) {
2627     PetscCall(MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL));
2628   }
2629   PetscCall(MatShift_Basic(Y,a));
2630   PetscFunctionReturn(0);
2631 }
2632 
2633 /* -------------------------------------------------------------------*/
2634 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2635                                        MatGetRow_SeqBAIJ,
2636                                        MatRestoreRow_SeqBAIJ,
2637                                        MatMult_SeqBAIJ_N,
2638                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2639                                        MatMultTranspose_SeqBAIJ,
2640                                        MatMultTransposeAdd_SeqBAIJ,
2641                                        NULL,
2642                                        NULL,
2643                                        NULL,
2644                                /* 10*/ NULL,
2645                                        MatLUFactor_SeqBAIJ,
2646                                        NULL,
2647                                        NULL,
2648                                        MatTranspose_SeqBAIJ,
2649                                /* 15*/ MatGetInfo_SeqBAIJ,
2650                                        MatEqual_SeqBAIJ,
2651                                        MatGetDiagonal_SeqBAIJ,
2652                                        MatDiagonalScale_SeqBAIJ,
2653                                        MatNorm_SeqBAIJ,
2654                                /* 20*/ NULL,
2655                                        MatAssemblyEnd_SeqBAIJ,
2656                                        MatSetOption_SeqBAIJ,
2657                                        MatZeroEntries_SeqBAIJ,
2658                                /* 24*/ MatZeroRows_SeqBAIJ,
2659                                        NULL,
2660                                        NULL,
2661                                        NULL,
2662                                        NULL,
2663                                /* 29*/ MatSetUp_SeqBAIJ,
2664                                        NULL,
2665                                        NULL,
2666                                        NULL,
2667                                        NULL,
2668                                /* 34*/ MatDuplicate_SeqBAIJ,
2669                                        NULL,
2670                                        NULL,
2671                                        MatILUFactor_SeqBAIJ,
2672                                        NULL,
2673                                /* 39*/ MatAXPY_SeqBAIJ,
2674                                        MatCreateSubMatrices_SeqBAIJ,
2675                                        MatIncreaseOverlap_SeqBAIJ,
2676                                        MatGetValues_SeqBAIJ,
2677                                        MatCopy_SeqBAIJ,
2678                                /* 44*/ NULL,
2679                                        MatScale_SeqBAIJ,
2680                                        MatShift_SeqBAIJ,
2681                                        NULL,
2682                                        MatZeroRowsColumns_SeqBAIJ,
2683                                /* 49*/ NULL,
2684                                        MatGetRowIJ_SeqBAIJ,
2685                                        MatRestoreRowIJ_SeqBAIJ,
2686                                        MatGetColumnIJ_SeqBAIJ,
2687                                        MatRestoreColumnIJ_SeqBAIJ,
2688                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2689                                        NULL,
2690                                        NULL,
2691                                        NULL,
2692                                        MatSetValuesBlocked_SeqBAIJ,
2693                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2694                                        MatDestroy_SeqBAIJ,
2695                                        MatView_SeqBAIJ,
2696                                        NULL,
2697                                        NULL,
2698                                /* 64*/ NULL,
2699                                        NULL,
2700                                        NULL,
2701                                        NULL,
2702                                        NULL,
2703                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2704                                        NULL,
2705                                        MatConvert_Basic,
2706                                        NULL,
2707                                        NULL,
2708                                /* 74*/ NULL,
2709                                        MatFDColoringApply_BAIJ,
2710                                        NULL,
2711                                        NULL,
2712                                        NULL,
2713                                /* 79*/ NULL,
2714                                        NULL,
2715                                        NULL,
2716                                        NULL,
2717                                        MatLoad_SeqBAIJ,
2718                                /* 84*/ NULL,
2719                                        NULL,
2720                                        NULL,
2721                                        NULL,
2722                                        NULL,
2723                                /* 89*/ NULL,
2724                                        NULL,
2725                                        NULL,
2726                                        NULL,
2727                                        NULL,
2728                                /* 94*/ NULL,
2729                                        NULL,
2730                                        NULL,
2731                                        NULL,
2732                                        NULL,
2733                                /* 99*/ NULL,
2734                                        NULL,
2735                                        NULL,
2736                                        MatConjugate_SeqBAIJ,
2737                                        NULL,
2738                                /*104*/ NULL,
2739                                        MatRealPart_SeqBAIJ,
2740                                        MatImaginaryPart_SeqBAIJ,
2741                                        NULL,
2742                                        NULL,
2743                                /*109*/ NULL,
2744                                        NULL,
2745                                        NULL,
2746                                        NULL,
2747                                        MatMissingDiagonal_SeqBAIJ,
2748                                /*114*/ NULL,
2749                                        NULL,
2750                                        NULL,
2751                                        NULL,
2752                                        NULL,
2753                                /*119*/ NULL,
2754                                        NULL,
2755                                        MatMultHermitianTranspose_SeqBAIJ,
2756                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2757                                        NULL,
2758                                /*124*/ NULL,
2759                                        MatGetColumnReductions_SeqBAIJ,
2760                                        MatInvertBlockDiagonal_SeqBAIJ,
2761                                        NULL,
2762                                        NULL,
2763                                /*129*/ NULL,
2764                                        NULL,
2765                                        NULL,
2766                                        NULL,
2767                                        NULL,
2768                                /*134*/ NULL,
2769                                        NULL,
2770                                        NULL,
2771                                        NULL,
2772                                        NULL,
2773                                /*139*/ MatSetBlockSizes_Default,
2774                                        NULL,
2775                                        NULL,
2776                                        MatFDColoringSetUp_SeqXAIJ,
2777                                        NULL,
2778                                /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2779                                        MatDestroySubMatrices_SeqBAIJ,
2780                                        NULL,
2781                                        NULL,
2782                                        NULL,
2783                                        NULL,
2784                                /*150*/ NULL,
2785 };
2786 
2787 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2788 {
2789   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2790   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2791 
2792   PetscFunctionBegin;
2793   PetscCheck(aij->nonew == 1,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2794 
2795   /* allocate space for values if not already there */
2796   if (!aij->saved_values) {
2797     PetscCall(PetscMalloc1(nz+1,&aij->saved_values));
2798     PetscCall(PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar)));
2799   }
2800 
2801   /* copy values over */
2802   PetscCall(PetscArraycpy(aij->saved_values,aij->a,nz));
2803   PetscFunctionReturn(0);
2804 }
2805 
2806 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2807 {
2808   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2809   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2810 
2811   PetscFunctionBegin;
2812   PetscCheck(aij->nonew == 1,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2813   PetscCheck(aij->saved_values,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2814 
2815   /* copy values over */
2816   PetscCall(PetscArraycpy(aij->a,aij->saved_values,nz));
2817   PetscFunctionReturn(0);
2818 }
2819 
2820 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2821 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2822 
2823 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2824 {
2825   Mat_SeqBAIJ    *b;
2826   PetscInt       i,mbs,nbs,bs2;
2827   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2828 
2829   PetscFunctionBegin;
2830   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2831   if (nz == MAT_SKIP_ALLOCATION) {
2832     skipallocation = PETSC_TRUE;
2833     nz             = 0;
2834   }
2835 
2836   PetscCall(MatSetBlockSize(B,PetscAbs(bs)));
2837   PetscCall(PetscLayoutSetUp(B->rmap));
2838   PetscCall(PetscLayoutSetUp(B->cmap));
2839   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
2840 
2841   B->preallocated = PETSC_TRUE;
2842 
2843   mbs = B->rmap->n/bs;
2844   nbs = B->cmap->n/bs;
2845   bs2 = bs*bs;
2846 
2847   PetscCheck(mbs*bs==B->rmap->n && nbs*bs==B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,B->cmap->n,bs);
2848 
2849   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2850   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %" PetscInt_FMT,nz);
2851   if (nnz) {
2852     for (i=0; i<mbs; i++) {
2853       PetscCheck(nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,nnz[i]);
2854       PetscCheck(nnz[i] <= nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT,i,nnz[i],nbs);
2855     }
2856   }
2857 
2858   b    = (Mat_SeqBAIJ*)B->data;
2859   PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2860   PetscCall(PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL));
2861   PetscOptionsEnd();
2862 
2863   if (!flg) {
2864     switch (bs) {
2865     case 1:
2866       B->ops->mult    = MatMult_SeqBAIJ_1;
2867       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2868       break;
2869     case 2:
2870       B->ops->mult    = MatMult_SeqBAIJ_2;
2871       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2872       break;
2873     case 3:
2874       B->ops->mult    = MatMult_SeqBAIJ_3;
2875       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2876       break;
2877     case 4:
2878       B->ops->mult    = MatMult_SeqBAIJ_4;
2879       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2880       break;
2881     case 5:
2882       B->ops->mult    = MatMult_SeqBAIJ_5;
2883       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2884       break;
2885     case 6:
2886       B->ops->mult    = MatMult_SeqBAIJ_6;
2887       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2888       break;
2889     case 7:
2890       B->ops->mult    = MatMult_SeqBAIJ_7;
2891       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2892       break;
2893     case 9:
2894     {
2895       PetscInt version = 1;
2896       PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL));
2897       switch (version) {
2898 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2899       case 1:
2900         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2901         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2902         PetscCall(PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2903         break;
2904 #endif
2905       default:
2906         B->ops->mult    = MatMult_SeqBAIJ_N;
2907         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2908         PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2909         break;
2910       }
2911       break;
2912     }
2913     case 11:
2914       B->ops->mult    = MatMult_SeqBAIJ_11;
2915       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2916       break;
2917     case 12:
2918     {
2919       PetscInt version = 1;
2920       PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL));
2921       switch (version) {
2922       case 1:
2923         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
2924         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2925         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2926         break;
2927       case 2:
2928         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
2929         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
2930         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2931         break;
2932 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2933       case 3:
2934         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
2935         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2936         PetscCall(PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2937         break;
2938 #endif
2939       default:
2940         B->ops->mult    = MatMult_SeqBAIJ_N;
2941         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2942         PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2943         break;
2944       }
2945       break;
2946     }
2947     case 15:
2948     {
2949       PetscInt version = 1;
2950       PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL));
2951       switch (version) {
2952       case 1:
2953         B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2954         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2955         break;
2956       case 2:
2957         B->ops->mult    = MatMult_SeqBAIJ_15_ver2;
2958         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2959         break;
2960       case 3:
2961         B->ops->mult    = MatMult_SeqBAIJ_15_ver3;
2962         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2963         break;
2964       case 4:
2965         B->ops->mult    = MatMult_SeqBAIJ_15_ver4;
2966         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2967         break;
2968       default:
2969         B->ops->mult    = MatMult_SeqBAIJ_N;
2970         PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2971         break;
2972       }
2973       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2974       break;
2975     }
2976     default:
2977       B->ops->mult    = MatMult_SeqBAIJ_N;
2978       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2979       PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2980       break;
2981     }
2982   }
2983   B->ops->sor = MatSOR_SeqBAIJ;
2984   b->mbs = mbs;
2985   b->nbs = nbs;
2986   if (!skipallocation) {
2987     if (!b->imax) {
2988       PetscCall(PetscMalloc2(mbs,&b->imax,mbs,&b->ilen));
2989       PetscCall(PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt)));
2990 
2991       b->free_imax_ilen = PETSC_TRUE;
2992     }
2993     /* b->ilen will count nonzeros in each block row so far. */
2994     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2995     if (!nnz) {
2996       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2997       else if (nz < 0) nz = 1;
2998       nz = PetscMin(nz,nbs);
2999       for (i=0; i<mbs; i++) b->imax[i] = nz;
3000       PetscCall(PetscIntMultError(nz,mbs,&nz));
3001     } else {
3002       PetscInt64 nz64 = 0;
3003       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
3004       PetscCall(PetscIntCast(nz64,&nz));
3005     }
3006 
3007     /* allocate the matrix space */
3008     PetscCall(MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i));
3009     if (B->structure_only) {
3010       PetscCall(PetscMalloc1(nz,&b->j));
3011       PetscCall(PetscMalloc1(B->rmap->N+1,&b->i));
3012       PetscCall(PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt)));
3013     } else {
3014       PetscInt nzbs2 = 0;
3015       PetscCall(PetscIntMultError(nz,bs2,&nzbs2));
3016       PetscCall(PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i));
3017       PetscCall(PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))));
3018       PetscCall(PetscArrayzero(b->a,nz*bs2));
3019     }
3020     PetscCall(PetscArrayzero(b->j,nz));
3021 
3022     if (B->structure_only) {
3023       b->singlemalloc = PETSC_FALSE;
3024       b->free_a       = PETSC_FALSE;
3025     } else {
3026       b->singlemalloc = PETSC_TRUE;
3027       b->free_a       = PETSC_TRUE;
3028     }
3029     b->free_ij = PETSC_TRUE;
3030 
3031     b->i[0] = 0;
3032     for (i=1; i<mbs+1; i++) {
3033       b->i[i] = b->i[i-1] + b->imax[i-1];
3034     }
3035 
3036   } else {
3037     b->free_a  = PETSC_FALSE;
3038     b->free_ij = PETSC_FALSE;
3039   }
3040 
3041   b->bs2              = bs2;
3042   b->mbs              = mbs;
3043   b->nz               = 0;
3044   b->maxnz            = nz;
3045   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3046   B->was_assembled    = PETSC_FALSE;
3047   B->assembled        = PETSC_FALSE;
3048   if (realalloc) PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
3049   PetscFunctionReturn(0);
3050 }
3051 
3052 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3053 {
3054   PetscInt       i,m,nz,nz_max=0,*nnz;
3055   PetscScalar    *values=NULL;
3056   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
3057 
3058   PetscFunctionBegin;
3059   PetscCheck(bs >= 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs);
3060   PetscCall(PetscLayoutSetBlockSize(B->rmap,bs));
3061   PetscCall(PetscLayoutSetBlockSize(B->cmap,bs));
3062   PetscCall(PetscLayoutSetUp(B->rmap));
3063   PetscCall(PetscLayoutSetUp(B->cmap));
3064   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
3065   m    = B->rmap->n/bs;
3066 
3067   PetscCheck(ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]);
3068   PetscCall(PetscMalloc1(m+1, &nnz));
3069   for (i=0; i<m; i++) {
3070     nz = ii[i+1]- ii[i];
3071     PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz);
3072     nz_max = PetscMax(nz_max, nz);
3073     nnz[i] = nz;
3074   }
3075   PetscCall(MatSeqBAIJSetPreallocation(B,bs,0,nnz));
3076   PetscCall(PetscFree(nnz));
3077 
3078   values = (PetscScalar*)V;
3079   if (!values) {
3080     PetscCall(PetscCalloc1(bs*bs*(nz_max+1),&values));
3081   }
3082   for (i=0; i<m; i++) {
3083     PetscInt          ncols  = ii[i+1] - ii[i];
3084     const PetscInt    *icols = jj + ii[i];
3085     if (bs == 1 || !roworiented) {
3086       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3087       PetscCall(MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES));
3088     } else {
3089       PetscInt j;
3090       for (j=0; j<ncols; j++) {
3091         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3092         PetscCall(MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES));
3093       }
3094     }
3095   }
3096   if (!V) PetscCall(PetscFree(values));
3097   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3098   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3099   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3100   PetscFunctionReturn(0);
3101 }
3102 
3103 /*@C
3104    MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
3105 
3106    Not Collective
3107 
3108    Input Parameter:
3109 .  mat - a MATSEQBAIJ matrix
3110 
3111    Output Parameter:
3112 .   array - pointer to the data
3113 
3114    Level: intermediate
3115 
3116 .seealso: `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3117 @*/
3118 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3119 {
3120   PetscFunctionBegin;
3121   PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));
3122   PetscFunctionReturn(0);
3123 }
3124 
3125 /*@C
3126    MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3127 
3128    Not Collective
3129 
3130    Input Parameters:
3131 +  mat - a MATSEQBAIJ matrix
3132 -  array - pointer to the data
3133 
3134    Level: intermediate
3135 
3136 .seealso: `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3137 @*/
3138 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3139 {
3140   PetscFunctionBegin;
3141   PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));
3142   PetscFunctionReturn(0);
3143 }
3144 
3145 /*MC
3146    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3147    block sparse compressed row format.
3148 
3149    Options Database Keys:
3150 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3151 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3152 
3153    Level: beginner
3154 
3155    Notes:
3156     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3157     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
3158 
3159    Run with -info to see what version of the matrix-vector product is being used
3160 
3161 .seealso: `MatCreateSeqBAIJ()`
3162 M*/
3163 
3164 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3165 
3166 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3167 {
3168   PetscMPIInt    size;
3169   Mat_SeqBAIJ    *b;
3170 
3171   PetscFunctionBegin;
3172   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
3173   PetscCheck(size == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3174 
3175   PetscCall(PetscNewLog(B,&b));
3176   B->data = (void*)b;
3177   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
3178 
3179   b->row          = NULL;
3180   b->col          = NULL;
3181   b->icol         = NULL;
3182   b->reallocs     = 0;
3183   b->saved_values = NULL;
3184 
3185   b->roworiented        = PETSC_TRUE;
3186   b->nonew              = 0;
3187   b->diag               = NULL;
3188   B->spptr              = NULL;
3189   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3190   b->keepnonzeropattern = PETSC_FALSE;
3191 
3192   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ));
3193   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ));
3194   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ));
3195   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ));
3196   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ));
3197   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ));
3198   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ));
3199   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ));
3200   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
3201   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ));
3202 #if defined(PETSC_HAVE_HYPRE)
3203   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE));
3204 #endif
3205   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS));
3206   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ));
3207   PetscFunctionReturn(0);
3208 }
3209 
3210 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3211 {
3212   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3213   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3214 
3215   PetscFunctionBegin;
3216   PetscCheck(a->i[mbs] == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3217 
3218   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3219     c->imax           = a->imax;
3220     c->ilen           = a->ilen;
3221     c->free_imax_ilen = PETSC_FALSE;
3222   } else {
3223     PetscCall(PetscMalloc2(mbs,&c->imax,mbs,&c->ilen));
3224     PetscCall(PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt)));
3225     for (i=0; i<mbs; i++) {
3226       c->imax[i] = a->imax[i];
3227       c->ilen[i] = a->ilen[i];
3228     }
3229     c->free_imax_ilen = PETSC_TRUE;
3230   }
3231 
3232   /* allocate the matrix space */
3233   if (mallocmatspace) {
3234     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3235       PetscCall(PetscCalloc1(bs2*nz,&c->a));
3236       PetscCall(PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar)));
3237 
3238       c->i            = a->i;
3239       c->j            = a->j;
3240       c->singlemalloc = PETSC_FALSE;
3241       c->free_a       = PETSC_TRUE;
3242       c->free_ij      = PETSC_FALSE;
3243       c->parent       = A;
3244       C->preallocated = PETSC_TRUE;
3245       C->assembled    = PETSC_TRUE;
3246 
3247       PetscCall(PetscObjectReference((PetscObject)A));
3248       PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3249       PetscCall(MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3250     } else {
3251       PetscCall(PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i));
3252       PetscCall(PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt)));
3253 
3254       c->singlemalloc = PETSC_TRUE;
3255       c->free_a       = PETSC_TRUE;
3256       c->free_ij      = PETSC_TRUE;
3257 
3258       PetscCall(PetscArraycpy(c->i,a->i,mbs+1));
3259       if (mbs > 0) {
3260         PetscCall(PetscArraycpy(c->j,a->j,nz));
3261         if (cpvalues == MAT_COPY_VALUES) {
3262           PetscCall(PetscArraycpy(c->a,a->a,bs2*nz));
3263         } else {
3264           PetscCall(PetscArrayzero(c->a,bs2*nz));
3265         }
3266       }
3267       C->preallocated = PETSC_TRUE;
3268       C->assembled    = PETSC_TRUE;
3269     }
3270   }
3271 
3272   c->roworiented = a->roworiented;
3273   c->nonew       = a->nonew;
3274 
3275   PetscCall(PetscLayoutReference(A->rmap,&C->rmap));
3276   PetscCall(PetscLayoutReference(A->cmap,&C->cmap));
3277 
3278   c->bs2         = a->bs2;
3279   c->mbs         = a->mbs;
3280   c->nbs         = a->nbs;
3281 
3282   if (a->diag) {
3283     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3284       c->diag      = a->diag;
3285       c->free_diag = PETSC_FALSE;
3286     } else {
3287       PetscCall(PetscMalloc1(mbs+1,&c->diag));
3288       PetscCall(PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt)));
3289       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3290       c->free_diag = PETSC_TRUE;
3291     }
3292   } else c->diag = NULL;
3293 
3294   c->nz         = a->nz;
3295   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3296   c->solve_work = NULL;
3297   c->mult_work  = NULL;
3298   c->sor_workt  = NULL;
3299   c->sor_work   = NULL;
3300 
3301   c->compressedrow.use   = a->compressedrow.use;
3302   c->compressedrow.nrows = a->compressedrow.nrows;
3303   if (a->compressedrow.use) {
3304     i    = a->compressedrow.nrows;
3305     PetscCall(PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex));
3306     PetscCall(PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt)));
3307     PetscCall(PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1));
3308     PetscCall(PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i));
3309   } else {
3310     c->compressedrow.use    = PETSC_FALSE;
3311     c->compressedrow.i      = NULL;
3312     c->compressedrow.rindex = NULL;
3313   }
3314   C->nonzerostate = A->nonzerostate;
3315 
3316   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist));
3317   PetscFunctionReturn(0);
3318 }
3319 
3320 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3321 {
3322   PetscFunctionBegin;
3323   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),B));
3324   PetscCall(MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n));
3325   PetscCall(MatSetType(*B,MATSEQBAIJ));
3326   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE));
3327   PetscFunctionReturn(0);
3328 }
3329 
3330 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3331 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3332 {
3333   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3334   PetscInt       *rowidxs,*colidxs;
3335   PetscScalar    *matvals;
3336 
3337   PetscFunctionBegin;
3338   PetscCall(PetscViewerSetUp(viewer));
3339 
3340   /* read matrix header */
3341   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
3342   PetscCheck(header[0] == MAT_FILE_CLASSID,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3343   M = header[1]; N = header[2]; nz = header[3];
3344   PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3345   PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3346   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ");
3347 
3348   /* set block sizes from the viewer's .info file */
3349   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
3350   /* set local and global sizes if not set already */
3351   if (mat->rmap->n < 0) mat->rmap->n = M;
3352   if (mat->cmap->n < 0) mat->cmap->n = N;
3353   if (mat->rmap->N < 0) mat->rmap->N = M;
3354   if (mat->cmap->N < 0) mat->cmap->N = N;
3355   PetscCall(PetscLayoutSetUp(mat->rmap));
3356   PetscCall(PetscLayoutSetUp(mat->cmap));
3357 
3358   /* check if the matrix sizes are correct */
3359   PetscCall(MatGetSize(mat,&rows,&cols));
3360   PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3361   PetscCall(MatGetBlockSize(mat,&bs));
3362   PetscCall(MatGetLocalSize(mat,&m,&n));
3363   mbs = m/bs; nbs = n/bs;
3364 
3365   /* read in row lengths, column indices and nonzero values */
3366   PetscCall(PetscMalloc1(m+1,&rowidxs));
3367   PetscCall(PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT));
3368   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3369   sum = rowidxs[m];
3370   PetscCheck(sum == nz,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3371 
3372   /* read in column indices and nonzero values */
3373   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals));
3374   PetscCall(PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT));
3375   PetscCall(PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR));
3376 
3377   { /* preallocate matrix storage */
3378     PetscBT   bt; /* helper bit set to count nonzeros */
3379     PetscInt  *nnz;
3380     PetscBool sbaij;
3381 
3382     PetscCall(PetscBTCreate(nbs,&bt));
3383     PetscCall(PetscCalloc1(mbs,&nnz));
3384     PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij));
3385     for (i=0; i<mbs; i++) {
3386       PetscCall(PetscBTMemzero(nbs,bt));
3387       for (k=0; k<bs; k++) {
3388         PetscInt row = bs*i + k;
3389         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3390           PetscInt col = colidxs[j];
3391           if (!sbaij || col >= row)
3392             if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++;
3393         }
3394       }
3395     }
3396     PetscCall(PetscBTDestroy(&bt));
3397     PetscCall(MatSeqBAIJSetPreallocation(mat,bs,0,nnz));
3398     PetscCall(MatSeqSBAIJSetPreallocation(mat,bs,0,nnz));
3399     PetscCall(PetscFree(nnz));
3400   }
3401 
3402   /* store matrix values */
3403   for (i=0; i<m; i++) {
3404     PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3405     PetscCall((*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES));
3406   }
3407 
3408   PetscCall(PetscFree(rowidxs));
3409   PetscCall(PetscFree2(colidxs,matvals));
3410   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
3411   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
3412   PetscFunctionReturn(0);
3413 }
3414 
3415 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3416 {
3417   PetscBool isbinary;
3418 
3419   PetscFunctionBegin;
3420   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
3421   PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3422   PetscCall(MatLoad_SeqBAIJ_Binary(mat,viewer));
3423   PetscFunctionReturn(0);
3424 }
3425 
3426 /*@C
3427    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3428    compressed row) format.  For good matrix assembly performance the
3429    user should preallocate the matrix storage by setting the parameter nz
3430    (or the array nnz).  By setting these parameters accurately, performance
3431    during matrix assembly can be increased by more than a factor of 50.
3432 
3433    Collective
3434 
3435    Input Parameters:
3436 +  comm - MPI communicator, set to PETSC_COMM_SELF
3437 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3438           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3439 .  m - number of rows
3440 .  n - number of columns
3441 .  nz - number of nonzero blocks  per block row (same for all rows)
3442 -  nnz - array containing the number of nonzero blocks in the various block rows
3443          (possibly different for each block row) or NULL
3444 
3445    Output Parameter:
3446 .  A - the matrix
3447 
3448    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3449    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3450    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3451 
3452    Options Database Keys:
3453 +   -mat_no_unroll - uses code that does not unroll the loops in the
3454                      block calculations (much slower)
3455 -    -mat_block_size - size of the blocks to use
3456 
3457    Level: intermediate
3458 
3459    Notes:
3460    The number of rows and columns must be divisible by blocksize.
3461 
3462    If the nnz parameter is given then the nz parameter is ignored
3463 
3464    A nonzero block is any block that as 1 or more nonzeros in it
3465 
3466    The block AIJ format is fully compatible with standard Fortran 77
3467    storage.  That is, the stored row and column indices can begin at
3468    either one (as in Fortran) or zero.  See the users' manual for details.
3469 
3470    Specify the preallocated storage with either nz or nnz (not both).
3471    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3472    allocation.  See Users-Manual: ch_mat for details.
3473    matrices.
3474 
3475 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3476 @*/
3477 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3478 {
3479   PetscFunctionBegin;
3480   PetscCall(MatCreate(comm,A));
3481   PetscCall(MatSetSizes(*A,m,n,m,n));
3482   PetscCall(MatSetType(*A,MATSEQBAIJ));
3483   PetscCall(MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz));
3484   PetscFunctionReturn(0);
3485 }
3486 
3487 /*@C
3488    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3489    per row in the matrix. For good matrix assembly performance the
3490    user should preallocate the matrix storage by setting the parameter nz
3491    (or the array nnz).  By setting these parameters accurately, performance
3492    during matrix assembly can be increased by more than a factor of 50.
3493 
3494    Collective
3495 
3496    Input Parameters:
3497 +  B - the matrix
3498 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3499           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3500 .  nz - number of block nonzeros per block row (same for all rows)
3501 -  nnz - array containing the number of block nonzeros in the various block rows
3502          (possibly different for each block row) or NULL
3503 
3504    Options Database Keys:
3505 +   -mat_no_unroll - uses code that does not unroll the loops in the
3506                      block calculations (much slower)
3507 -   -mat_block_size - size of the blocks to use
3508 
3509    Level: intermediate
3510 
3511    Notes:
3512    If the nnz parameter is given then the nz parameter is ignored
3513 
3514    You can call MatGetInfo() to get information on how effective the preallocation was;
3515    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3516    You can also run with the option -info and look for messages with the string
3517    malloc in them to see if additional memory allocation was needed.
3518 
3519    The block AIJ format is fully compatible with standard Fortran 77
3520    storage.  That is, the stored row and column indices can begin at
3521    either one (as in Fortran) or zero.  See the users' manual for details.
3522 
3523    Specify the preallocated storage with either nz or nnz (not both).
3524    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3525    allocation.  See Users-Manual: ch_mat for details.
3526 
3527 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3528 @*/
3529 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3530 {
3531   PetscFunctionBegin;
3532   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3533   PetscValidType(B,1);
3534   PetscValidLogicalCollectiveInt(B,bs,2);
3535   PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3536   PetscFunctionReturn(0);
3537 }
3538 
3539 /*@C
3540    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3541 
3542    Collective
3543 
3544    Input Parameters:
3545 +  B - the matrix
3546 .  i - the indices into j for the start of each local row (starts with zero)
3547 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3548 -  v - optional values in the matrix
3549 
3550    Level: advanced
3551 
3552    Notes:
3553    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3554    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3555    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3556    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3557    block column and the second index is over columns within a block.
3558 
3559    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3560 
3561 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3562 @*/
3563 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3564 {
3565   PetscFunctionBegin;
3566   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3567   PetscValidType(B,1);
3568   PetscValidLogicalCollectiveInt(B,bs,2);
3569   PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3570   PetscFunctionReturn(0);
3571 }
3572 
3573 /*@
3574      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3575 
3576      Collective
3577 
3578    Input Parameters:
3579 +  comm - must be an MPI communicator of size 1
3580 .  bs - size of block
3581 .  m - number of rows
3582 .  n - number of columns
3583 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3584 .  j - column indices
3585 -  a - matrix values
3586 
3587    Output Parameter:
3588 .  mat - the matrix
3589 
3590    Level: advanced
3591 
3592    Notes:
3593        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3594     once the matrix is destroyed
3595 
3596        You cannot set new nonzero locations into this matrix, that will generate an error.
3597 
3598        The i and j indices are 0 based
3599 
3600        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3601 
3602       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3603       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3604       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3605       with column-major ordering within blocks.
3606 
3607 .seealso: `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3608 
3609 @*/
3610 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3611 {
3612   PetscInt       ii;
3613   Mat_SeqBAIJ    *baij;
3614 
3615   PetscFunctionBegin;
3616   PetscCheck(bs == 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %" PetscInt_FMT " > 1 is not supported yet",bs);
3617   if (m > 0) PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3618 
3619   PetscCall(MatCreate(comm,mat));
3620   PetscCall(MatSetSizes(*mat,m,n,m,n));
3621   PetscCall(MatSetType(*mat,MATSEQBAIJ));
3622   PetscCall(MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL));
3623   baij = (Mat_SeqBAIJ*)(*mat)->data;
3624   PetscCall(PetscMalloc2(m,&baij->imax,m,&baij->ilen));
3625   PetscCall(PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt)));
3626 
3627   baij->i = i;
3628   baij->j = j;
3629   baij->a = a;
3630 
3631   baij->singlemalloc = PETSC_FALSE;
3632   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3633   baij->free_a       = PETSC_FALSE;
3634   baij->free_ij      = PETSC_FALSE;
3635 
3636   for (ii=0; ii<m; ii++) {
3637     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3638     PetscCheck(i[ii+1] - i[ii] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT,ii,i[ii+1] - i[ii]);
3639   }
3640   if (PetscDefined(USE_DEBUG)) {
3641     for (ii=0; ii<baij->i[m]; ii++) {
3642       PetscCheck(j[ii] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT,ii,j[ii]);
3643       PetscCheck(j[ii] <= n - 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT,ii,j[ii]);
3644     }
3645   }
3646 
3647   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
3648   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
3649   PetscFunctionReturn(0);
3650 }
3651 
3652 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3653 {
3654   PetscFunctionBegin;
3655   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat));
3656   PetscFunctionReturn(0);
3657 }
3658