xref: /petsc/src/mat/impls/baij/seq/baij.c (revision fdf6c4e30aafdbc795e4f76379caa977fd5cdf5a)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 
20 PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A,PetscInt type,PetscReal *reductions)
21 {
22   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) A->data;
23   PetscInt       m,n,i;
24   PetscInt       ib,jb,bs = A->rmap->bs;
25   MatScalar      *a_val = a_aij->a;
26 
27   PetscFunctionBegin;
28   PetscCall(MatGetSize(A,&m,&n));
29   for (i=0; i<n; i++) reductions[i] = 0.0;
30   if (type == NORM_2) {
31     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
32       for (jb=0; jb<bs; jb++) {
33         for (ib=0; ib<bs; ib++) {
34           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
35           a_val++;
36         }
37       }
38     }
39   } else if (type == NORM_1) {
40     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
41       for (jb=0; jb<bs; jb++) {
42         for (ib=0; ib<bs; ib++) {
43           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
44           a_val++;
45         }
46       }
47     }
48   } else if (type == NORM_INFINITY) {
49     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
50       for (jb=0; jb<bs; jb++) {
51         for (ib=0; ib<bs; ib++) {
52           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
53           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
54           a_val++;
55         }
56       }
57     }
58   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
59     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
60       for (jb=0; jb<bs; jb++) {
61         for (ib=0; ib<bs; ib++) {
62           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
63           a_val++;
64         }
65       }
66     }
67   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
68     for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) {
69       for (jb=0; jb<bs; jb++) {
70         for (ib=0; ib<bs; ib++) {
71           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
72           a_val++;
73         }
74       }
75     }
76   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type");
77   if (type == NORM_2) {
78     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
79   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
80     for (i=0; i<n; i++) reductions[i] /= m;
81   }
82   PetscFunctionReturn(0);
83 }
84 
85 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
86 {
87   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
88   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
89   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
90   PetscReal      shift = 0.0;
91   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
92 
93   PetscFunctionBegin;
94   allowzeropivot = PetscNot(A->erroriffailure);
95 
96   if (a->idiagvalid) {
97     if (values) *values = a->idiag;
98     PetscFunctionReturn(0);
99   }
100   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
101   diag_offset = a->diag;
102   if (!a->idiag) {
103     PetscCall(PetscMalloc1(bs2*mbs,&a->idiag));
104     PetscCall(PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar)));
105   }
106   diag  = a->idiag;
107   if (values) *values = a->idiag;
108   /* factor and invert each block */
109   switch (bs) {
110   case 1:
111     for (i=0; i<mbs; i++) {
112       odiag    = v + 1*diag_offset[i];
113       diag[0]  = odiag[0];
114 
115       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
116         if (allowzeropivot) {
117           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
118           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
119           A->factorerror_zeropivot_row   = i;
120           PetscCall(PetscInfo(A,"Zero pivot, row %" PetscInt_FMT "\n",i));
121         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
122       }
123 
124       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
125       diag    += 1;
126     }
127     break;
128   case 2:
129     for (i=0; i<mbs; i++) {
130       odiag    = v + 4*diag_offset[i];
131       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
132       PetscCall(PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected));
133       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
134       diag    += 4;
135     }
136     break;
137   case 3:
138     for (i=0; i<mbs; i++) {
139       odiag    = v + 9*diag_offset[i];
140       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
141       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
142       diag[8]  = odiag[8];
143       PetscCall(PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected));
144       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
145       diag    += 9;
146     }
147     break;
148   case 4:
149     for (i=0; i<mbs; i++) {
150       odiag  = v + 16*diag_offset[i];
151       PetscCall(PetscArraycpy(diag,odiag,16));
152       PetscCall(PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected));
153       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
154       diag  += 16;
155     }
156     break;
157   case 5:
158     for (i=0; i<mbs; i++) {
159       odiag  = v + 25*diag_offset[i];
160       PetscCall(PetscArraycpy(diag,odiag,25));
161       PetscCall(PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected));
162       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
163       diag  += 25;
164     }
165     break;
166   case 6:
167     for (i=0; i<mbs; i++) {
168       odiag  = v + 36*diag_offset[i];
169       PetscCall(PetscArraycpy(diag,odiag,36));
170       PetscCall(PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected));
171       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
172       diag  += 36;
173     }
174     break;
175   case 7:
176     for (i=0; i<mbs; i++) {
177       odiag  = v + 49*diag_offset[i];
178       PetscCall(PetscArraycpy(diag,odiag,49));
179       PetscCall(PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected));
180       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
181       diag  += 49;
182     }
183     break;
184   default:
185     PetscCall(PetscMalloc2(bs,&v_work,bs,&v_pivots));
186     for (i=0; i<mbs; i++) {
187       odiag  = v + bs2*diag_offset[i];
188       PetscCall(PetscArraycpy(diag,odiag,bs2));
189       PetscCall(PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected));
190       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
191       diag  += bs2;
192     }
193     PetscCall(PetscFree2(v_work,v_pivots));
194   }
195   a->idiagvalid = PETSC_TRUE;
196   PetscFunctionReturn(0);
197 }
198 
199 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
200 {
201   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
202   PetscScalar       *x,*work,*w,*workt,*t;
203   const MatScalar   *v,*aa = a->a, *idiag;
204   const PetscScalar *b,*xb;
205   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
206   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
207   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
208 
209   PetscFunctionBegin;
210   its = its*lits;
211   PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
212   PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
213   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
214   PetscCheck(omega == 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor");
215   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
216 
217   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A,NULL));
218 
219   if (!m) PetscFunctionReturn(0);
220   diag  = a->diag;
221   idiag = a->idiag;
222   k    = PetscMax(A->rmap->n,A->cmap->n);
223   if (!a->mult_work) {
224     PetscCall(PetscMalloc1(k+1,&a->mult_work));
225   }
226   if (!a->sor_workt) {
227     PetscCall(PetscMalloc1(k,&a->sor_workt));
228   }
229   if (!a->sor_work) {
230     PetscCall(PetscMalloc1(bs,&a->sor_work));
231   }
232   work = a->mult_work;
233   t    = a->sor_workt;
234   w    = a->sor_work;
235 
236   PetscCall(VecGetArray(xx,&x));
237   PetscCall(VecGetArrayRead(bb,&b));
238 
239   if (flag & SOR_ZERO_INITIAL_GUESS) {
240     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
241       switch (bs) {
242       case 1:
243         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
244         t[0] = b[0];
245         i2     = 1;
246         idiag += 1;
247         for (i=1; i<m; i++) {
248           v  = aa + ai[i];
249           vi = aj + ai[i];
250           nz = diag[i] - ai[i];
251           s[0] = b[i2];
252           for (j=0; j<nz; j++) {
253             xw[0] = x[vi[j]];
254             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
255           }
256           t[i2] = s[0];
257           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
258           x[i2]  = xw[0];
259           idiag += 1;
260           i2    += 1;
261         }
262         break;
263       case 2:
264         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
265         t[0] = b[0]; t[1] = b[1];
266         i2     = 2;
267         idiag += 4;
268         for (i=1; i<m; i++) {
269           v  = aa + 4*ai[i];
270           vi = aj + ai[i];
271           nz = diag[i] - ai[i];
272           s[0] = b[i2]; s[1] = b[i2+1];
273           for (j=0; j<nz; j++) {
274             idx = 2*vi[j];
275             it  = 4*j;
276             xw[0] = x[idx]; xw[1] = x[1+idx];
277             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
278           }
279           t[i2] = s[0]; t[i2+1] = s[1];
280           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
281           x[i2]   = xw[0]; x[i2+1] = xw[1];
282           idiag  += 4;
283           i2     += 2;
284         }
285         break;
286       case 3:
287         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
288         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
289         i2     = 3;
290         idiag += 9;
291         for (i=1; i<m; i++) {
292           v  = aa + 9*ai[i];
293           vi = aj + ai[i];
294           nz = diag[i] - ai[i];
295           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
296           while (nz--) {
297             idx = 3*(*vi++);
298             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
299             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
300             v  += 9;
301           }
302           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
303           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
304           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
305           idiag  += 9;
306           i2     += 3;
307         }
308         break;
309       case 4:
310         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
311         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
312         i2     = 4;
313         idiag += 16;
314         for (i=1; i<m; i++) {
315           v  = aa + 16*ai[i];
316           vi = aj + ai[i];
317           nz = diag[i] - ai[i];
318           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
319           while (nz--) {
320             idx = 4*(*vi++);
321             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
322             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
323             v  += 16;
324           }
325           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
326           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
327           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
328           idiag  += 16;
329           i2     += 4;
330         }
331         break;
332       case 5:
333         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
334         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
335         i2     = 5;
336         idiag += 25;
337         for (i=1; i<m; i++) {
338           v  = aa + 25*ai[i];
339           vi = aj + ai[i];
340           nz = diag[i] - ai[i];
341           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
342           while (nz--) {
343             idx = 5*(*vi++);
344             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
345             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
346             v  += 25;
347           }
348           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
349           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
350           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
351           idiag  += 25;
352           i2     += 5;
353         }
354         break;
355       case 6:
356         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
357         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
358         i2     = 6;
359         idiag += 36;
360         for (i=1; i<m; i++) {
361           v  = aa + 36*ai[i];
362           vi = aj + ai[i];
363           nz = diag[i] - ai[i];
364           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
365           while (nz--) {
366             idx = 6*(*vi++);
367             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
368             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
369             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
370             v  += 36;
371           }
372           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
373           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
374           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
375           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
376           idiag  += 36;
377           i2     += 6;
378         }
379         break;
380       case 7:
381         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
382         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
383         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
384         i2     = 7;
385         idiag += 49;
386         for (i=1; i<m; i++) {
387           v  = aa + 49*ai[i];
388           vi = aj + ai[i];
389           nz = diag[i] - ai[i];
390           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
391           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
392           while (nz--) {
393             idx = 7*(*vi++);
394             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
395             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
396             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
397             v  += 49;
398           }
399           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
400           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
401           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
402           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
403           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
404           idiag  += 49;
405           i2     += 7;
406         }
407         break;
408       default:
409         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
410         PetscCall(PetscArraycpy(t,b,bs));
411         i2     = bs;
412         idiag += bs2;
413         for (i=1; i<m; i++) {
414           v  = aa + bs2*ai[i];
415           vi = aj + ai[i];
416           nz = diag[i] - ai[i];
417 
418           PetscCall(PetscArraycpy(w,b+i2,bs));
419           /* copy all rows of x that are needed into contiguous space */
420           workt = work;
421           for (j=0; j<nz; j++) {
422             PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs));
423             workt += bs;
424           }
425           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
426           PetscCall(PetscArraycpy(t+i2,w,bs));
427           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
428 
429           idiag += bs2;
430           i2    += bs;
431         }
432         break;
433       }
434       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
435       PetscCall(PetscLogFlops(1.0*bs2*a->nz));
436       xb = t;
437     }
438     else xb = b;
439     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
440       idiag = a->idiag+bs2*(a->mbs-1);
441       i2 = bs * (m-1);
442       switch (bs) {
443       case 1:
444         s[0]  = xb[i2];
445         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
446         x[i2] = xw[0];
447         i2   -= 1;
448         for (i=m-2; i>=0; i--) {
449           v  = aa + (diag[i]+1);
450           vi = aj + diag[i] + 1;
451           nz = ai[i+1] - diag[i] - 1;
452           s[0] = xb[i2];
453           for (j=0; j<nz; j++) {
454             xw[0] = x[vi[j]];
455             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
456           }
457           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
458           x[i2]  = xw[0];
459           idiag -= 1;
460           i2    -= 1;
461         }
462         break;
463       case 2:
464         s[0]  = xb[i2]; s[1] = xb[i2+1];
465         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
466         x[i2] = xw[0]; x[i2+1] = xw[1];
467         i2    -= 2;
468         idiag -= 4;
469         for (i=m-2; i>=0; i--) {
470           v  = aa + 4*(diag[i] + 1);
471           vi = aj + diag[i] + 1;
472           nz = ai[i+1] - diag[i] - 1;
473           s[0] = xb[i2]; s[1] = xb[i2+1];
474           for (j=0; j<nz; j++) {
475             idx = 2*vi[j];
476             it  = 4*j;
477             xw[0] = x[idx]; xw[1] = x[1+idx];
478             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
479           }
480           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
481           x[i2]   = xw[0]; x[i2+1] = xw[1];
482           idiag  -= 4;
483           i2     -= 2;
484         }
485         break;
486       case 3:
487         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
488         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
489         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
490         i2    -= 3;
491         idiag -= 9;
492         for (i=m-2; i>=0; i--) {
493           v  = aa + 9*(diag[i]+1);
494           vi = aj + diag[i] + 1;
495           nz = ai[i+1] - diag[i] - 1;
496           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
497           while (nz--) {
498             idx = 3*(*vi++);
499             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
500             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
501             v  += 9;
502           }
503           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
504           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
505           idiag  -= 9;
506           i2     -= 3;
507         }
508         break;
509       case 4:
510         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
511         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
512         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
513         i2    -= 4;
514         idiag -= 16;
515         for (i=m-2; i>=0; i--) {
516           v  = aa + 16*(diag[i]+1);
517           vi = aj + diag[i] + 1;
518           nz = ai[i+1] - diag[i] - 1;
519           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
520           while (nz--) {
521             idx = 4*(*vi++);
522             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
523             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
524             v  += 16;
525           }
526           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
527           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
528           idiag  -= 16;
529           i2     -= 4;
530         }
531         break;
532       case 5:
533         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
534         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
535         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
536         i2    -= 5;
537         idiag -= 25;
538         for (i=m-2; i>=0; i--) {
539           v  = aa + 25*(diag[i]+1);
540           vi = aj + diag[i] + 1;
541           nz = ai[i+1] - diag[i] - 1;
542           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
543           while (nz--) {
544             idx = 5*(*vi++);
545             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
546             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
547             v  += 25;
548           }
549           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
550           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
551           idiag  -= 25;
552           i2     -= 5;
553         }
554         break;
555       case 6:
556         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
557         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
558         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
559         i2    -= 6;
560         idiag -= 36;
561         for (i=m-2; i>=0; i--) {
562           v  = aa + 36*(diag[i]+1);
563           vi = aj + diag[i] + 1;
564           nz = ai[i+1] - diag[i] - 1;
565           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
566           while (nz--) {
567             idx = 6*(*vi++);
568             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
569             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
570             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
571             v  += 36;
572           }
573           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
574           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
575           idiag  -= 36;
576           i2     -= 6;
577         }
578         break;
579       case 7:
580         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
581         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
582         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
583         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
584         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
585         i2    -= 7;
586         idiag -= 49;
587         for (i=m-2; i>=0; i--) {
588           v  = aa + 49*(diag[i]+1);
589           vi = aj + diag[i] + 1;
590           nz = ai[i+1] - diag[i] - 1;
591           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
592           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
593           while (nz--) {
594             idx = 7*(*vi++);
595             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
596             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
597             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
598             v  += 49;
599           }
600           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
601           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
602           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
603           idiag  -= 49;
604           i2     -= 7;
605         }
606         break;
607       default:
608         PetscCall(PetscArraycpy(w,xb+i2,bs));
609         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
610         i2    -= bs;
611         idiag -= bs2;
612         for (i=m-2; i>=0; i--) {
613           v  = aa + bs2*(diag[i]+1);
614           vi = aj + diag[i] + 1;
615           nz = ai[i+1] - diag[i] - 1;
616 
617           PetscCall(PetscArraycpy(w,xb+i2,bs));
618           /* copy all rows of x that are needed into contiguous space */
619           workt = work;
620           for (j=0; j<nz; j++) {
621             PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs));
622             workt += bs;
623           }
624           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
625           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
626 
627           idiag -= bs2;
628           i2    -= bs;
629         }
630         break;
631       }
632       PetscCall(PetscLogFlops(1.0*bs2*(a->nz)));
633     }
634     its--;
635   }
636   while (its--) {
637     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
638       idiag = a->idiag;
639       i2 = 0;
640       switch (bs) {
641       case 1:
642         for (i=0; i<m; i++) {
643           v  = aa + ai[i];
644           vi = aj + ai[i];
645           nz = ai[i+1] - ai[i];
646           s[0] = b[i2];
647           for (j=0; j<nz; j++) {
648             xw[0] = x[vi[j]];
649             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
650           }
651           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
652           x[i2] += xw[0];
653           idiag += 1;
654           i2    += 1;
655         }
656         break;
657       case 2:
658         for (i=0; i<m; i++) {
659           v  = aa + 4*ai[i];
660           vi = aj + ai[i];
661           nz = ai[i+1] - ai[i];
662           s[0] = b[i2]; s[1] = b[i2+1];
663           for (j=0; j<nz; j++) {
664             idx = 2*vi[j];
665             it  = 4*j;
666             xw[0] = x[idx]; xw[1] = x[1+idx];
667             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
668           }
669           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
670           x[i2]  += xw[0]; x[i2+1] += xw[1];
671           idiag  += 4;
672           i2     += 2;
673         }
674         break;
675       case 3:
676         for (i=0; i<m; i++) {
677           v  = aa + 9*ai[i];
678           vi = aj + ai[i];
679           nz = ai[i+1] - ai[i];
680           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
681           while (nz--) {
682             idx = 3*(*vi++);
683             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
684             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
685             v  += 9;
686           }
687           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
688           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
689           idiag  += 9;
690           i2     += 3;
691         }
692         break;
693       case 4:
694         for (i=0; i<m; i++) {
695           v  = aa + 16*ai[i];
696           vi = aj + ai[i];
697           nz = ai[i+1] - ai[i];
698           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
699           while (nz--) {
700             idx = 4*(*vi++);
701             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
702             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
703             v  += 16;
704           }
705           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
706           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
707           idiag  += 16;
708           i2     += 4;
709         }
710         break;
711       case 5:
712         for (i=0; i<m; i++) {
713           v  = aa + 25*ai[i];
714           vi = aj + ai[i];
715           nz = ai[i+1] - ai[i];
716           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
717           while (nz--) {
718             idx = 5*(*vi++);
719             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
720             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
721             v  += 25;
722           }
723           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
724           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
725           idiag  += 25;
726           i2     += 5;
727         }
728         break;
729       case 6:
730         for (i=0; i<m; i++) {
731           v  = aa + 36*ai[i];
732           vi = aj + ai[i];
733           nz = ai[i+1] - ai[i];
734           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
735           while (nz--) {
736             idx = 6*(*vi++);
737             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
738             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
739             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
740             v  += 36;
741           }
742           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
743           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
744           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
745           idiag  += 36;
746           i2     += 6;
747         }
748         break;
749       case 7:
750         for (i=0; i<m; i++) {
751           v  = aa + 49*ai[i];
752           vi = aj + ai[i];
753           nz = ai[i+1] - ai[i];
754           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
755           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
756           while (nz--) {
757             idx = 7*(*vi++);
758             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
759             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
760             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
761             v  += 49;
762           }
763           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
764           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
765           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
766           idiag  += 49;
767           i2     += 7;
768         }
769         break;
770       default:
771         for (i=0; i<m; i++) {
772           v  = aa + bs2*ai[i];
773           vi = aj + ai[i];
774           nz = ai[i+1] - ai[i];
775 
776           PetscCall(PetscArraycpy(w,b+i2,bs));
777           /* copy all rows of x that are needed into contiguous space */
778           workt = work;
779           for (j=0; j<nz; j++) {
780             PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs));
781             workt += bs;
782           }
783           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
784           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
785 
786           idiag += bs2;
787           i2    += bs;
788         }
789         break;
790       }
791       PetscCall(PetscLogFlops(2.0*bs2*a->nz));
792     }
793     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
794       idiag = a->idiag+bs2*(a->mbs-1);
795       i2 = bs * (m-1);
796       switch (bs) {
797       case 1:
798         for (i=m-1; i>=0; i--) {
799           v  = aa + ai[i];
800           vi = aj + ai[i];
801           nz = ai[i+1] - ai[i];
802           s[0] = b[i2];
803           for (j=0; j<nz; j++) {
804             xw[0] = x[vi[j]];
805             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
806           }
807           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
808           x[i2] += xw[0];
809           idiag -= 1;
810           i2    -= 1;
811         }
812         break;
813       case 2:
814         for (i=m-1; i>=0; i--) {
815           v  = aa + 4*ai[i];
816           vi = aj + ai[i];
817           nz = ai[i+1] - ai[i];
818           s[0] = b[i2]; s[1] = b[i2+1];
819           for (j=0; j<nz; j++) {
820             idx = 2*vi[j];
821             it  = 4*j;
822             xw[0] = x[idx]; xw[1] = x[1+idx];
823             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
824           }
825           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
826           x[i2]  += xw[0]; x[i2+1] += xw[1];
827           idiag  -= 4;
828           i2     -= 2;
829         }
830         break;
831       case 3:
832         for (i=m-1; i>=0; i--) {
833           v  = aa + 9*ai[i];
834           vi = aj + ai[i];
835           nz = ai[i+1] - ai[i];
836           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
837           while (nz--) {
838             idx = 3*(*vi++);
839             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
840             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
841             v  += 9;
842           }
843           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
844           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
845           idiag  -= 9;
846           i2     -= 3;
847         }
848         break;
849       case 4:
850         for (i=m-1; i>=0; i--) {
851           v  = aa + 16*ai[i];
852           vi = aj + ai[i];
853           nz = ai[i+1] - ai[i];
854           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
855           while (nz--) {
856             idx = 4*(*vi++);
857             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
858             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
859             v  += 16;
860           }
861           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
862           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
863           idiag  -= 16;
864           i2     -= 4;
865         }
866         break;
867       case 5:
868         for (i=m-1; i>=0; i--) {
869           v  = aa + 25*ai[i];
870           vi = aj + ai[i];
871           nz = ai[i+1] - ai[i];
872           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
873           while (nz--) {
874             idx = 5*(*vi++);
875             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
876             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
877             v  += 25;
878           }
879           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
880           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
881           idiag  -= 25;
882           i2     -= 5;
883         }
884         break;
885       case 6:
886         for (i=m-1; i>=0; i--) {
887           v  = aa + 36*ai[i];
888           vi = aj + ai[i];
889           nz = ai[i+1] - ai[i];
890           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
891           while (nz--) {
892             idx = 6*(*vi++);
893             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
894             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
895             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
896             v  += 36;
897           }
898           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
899           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
900           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
901           idiag  -= 36;
902           i2     -= 6;
903         }
904         break;
905       case 7:
906         for (i=m-1; i>=0; i--) {
907           v  = aa + 49*ai[i];
908           vi = aj + ai[i];
909           nz = ai[i+1] - ai[i];
910           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
911           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
912           while (nz--) {
913             idx = 7*(*vi++);
914             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
915             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
916             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
917             v  += 49;
918           }
919           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
920           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
921           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
922           idiag  -= 49;
923           i2     -= 7;
924         }
925         break;
926       default:
927         for (i=m-1; i>=0; i--) {
928           v  = aa + bs2*ai[i];
929           vi = aj + ai[i];
930           nz = ai[i+1] - ai[i];
931 
932           PetscCall(PetscArraycpy(w,b+i2,bs));
933           /* copy all rows of x that are needed into contiguous space */
934           workt = work;
935           for (j=0; j<nz; j++) {
936             PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs));
937             workt += bs;
938           }
939           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
940           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
941 
942           idiag -= bs2;
943           i2    -= bs;
944         }
945         break;
946       }
947       PetscCall(PetscLogFlops(2.0*bs2*(a->nz)));
948     }
949   }
950   PetscCall(VecRestoreArray(xx,&x));
951   PetscCall(VecRestoreArrayRead(bb,&b));
952   PetscFunctionReturn(0);
953 }
954 
955 /*
956     Special version for direct calls from Fortran (Used in PETSc-fun3d)
957 */
958 #if defined(PETSC_HAVE_FORTRAN_CAPS)
959 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
960 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
961 #define matsetvaluesblocked4_ matsetvaluesblocked4
962 #endif
963 
964 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
965 {
966   Mat               A  = *AA;
967   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
968   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
969   PetscInt          *ai    =a->i,*ailen=a->ilen;
970   PetscInt          *aj    =a->j,stepval,lastcol = -1;
971   const PetscScalar *value = v;
972   MatScalar         *ap,*aa = a->a,*bap;
973 
974   PetscFunctionBegin;
975   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
976   stepval = (n-1)*4;
977   for (k=0; k<m; k++) { /* loop over added rows */
978     row  = im[k];
979     rp   = aj + ai[row];
980     ap   = aa + 16*ai[row];
981     nrow = ailen[row];
982     low  = 0;
983     high = nrow;
984     for (l=0; l<n; l++) { /* loop over added columns */
985       col = in[l];
986       if (col <= lastcol)  low = 0;
987       else                high = nrow;
988       lastcol = col;
989       value   = v + k*(stepval+4 + l)*4;
990       while (high-low > 7) {
991         t = (low+high)/2;
992         if (rp[t] > col) high = t;
993         else             low  = t;
994       }
995       for (i=low; i<high; i++) {
996         if (rp[i] > col) break;
997         if (rp[i] == col) {
998           bap = ap +  16*i;
999           for (ii=0; ii<4; ii++,value+=stepval) {
1000             for (jj=ii; jj<16; jj+=4) {
1001               bap[jj] += *value++;
1002             }
1003           }
1004           goto noinsert2;
1005         }
1006       }
1007       N = nrow++ - 1;
1008       high++; /* added new column index thus must search to one higher than before */
1009       /* shift up all the later entries in this row */
1010       for (ii=N; ii>=i; ii--) {
1011         rp[ii+1] = rp[ii];
1012         PetscCallVoid(PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16));
1013       }
1014       if (N >= i) {
1015         PetscCallVoid(PetscArrayzero(ap+16*i,16));
1016       }
1017       rp[i] = col;
1018       bap   = ap +  16*i;
1019       for (ii=0; ii<4; ii++,value+=stepval) {
1020         for (jj=ii; jj<16; jj+=4) {
1021           bap[jj] = *value++;
1022         }
1023       }
1024       noinsert2:;
1025       low = i;
1026     }
1027     ailen[row] = nrow;
1028   }
1029   PetscFunctionReturnVoid();
1030 }
1031 
1032 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1033 #define matsetvalues4_ MATSETVALUES4
1034 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1035 #define matsetvalues4_ matsetvalues4
1036 #endif
1037 
1038 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1039 {
1040   Mat         A  = *AA;
1041   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1042   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
1043   PetscInt    *ai=a->i,*ailen=a->ilen;
1044   PetscInt    *aj=a->j,brow,bcol;
1045   PetscInt    ridx,cidx,lastcol = -1;
1046   MatScalar   *ap,value,*aa=a->a,*bap;
1047 
1048   PetscFunctionBegin;
1049   for (k=0; k<m; k++) { /* loop over added rows */
1050     row  = im[k]; brow = row/4;
1051     rp   = aj + ai[brow];
1052     ap   = aa + 16*ai[brow];
1053     nrow = ailen[brow];
1054     low  = 0;
1055     high = nrow;
1056     for (l=0; l<n; l++) { /* loop over added columns */
1057       col   = in[l]; bcol = col/4;
1058       ridx  = row % 4; cidx = col % 4;
1059       value = v[l + k*n];
1060       if (col <= lastcol)  low = 0;
1061       else                high = nrow;
1062       lastcol = col;
1063       while (high-low > 7) {
1064         t = (low+high)/2;
1065         if (rp[t] > bcol) high = t;
1066         else              low  = t;
1067       }
1068       for (i=low; i<high; i++) {
1069         if (rp[i] > bcol) break;
1070         if (rp[i] == bcol) {
1071           bap   = ap +  16*i + 4*cidx + ridx;
1072           *bap += value;
1073           goto noinsert1;
1074         }
1075       }
1076       N = nrow++ - 1;
1077       high++; /* added new column thus must search to one higher than before */
1078       /* shift up all the later entries in this row */
1079       PetscCallVoid(PetscArraymove(rp+i+1,rp+i,N-i+1));
1080       PetscCallVoid(PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1)));
1081       PetscCallVoid(PetscArrayzero(ap+16*i,16));
1082       rp[i]                    = bcol;
1083       ap[16*i + 4*cidx + ridx] = value;
1084 noinsert1:;
1085       low = i;
1086     }
1087     ailen[brow] = nrow;
1088   }
1089   PetscFunctionReturnVoid();
1090 }
1091 
1092 /*
1093      Checks for missing diagonals
1094 */
1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1096 {
1097   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1098   PetscInt       *diag,*ii = a->i,i;
1099 
1100   PetscFunctionBegin;
1101   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
1102   *missing = PETSC_FALSE;
1103   if (A->rmap->n > 0 && !ii) {
1104     *missing = PETSC_TRUE;
1105     if (d) *d = 0;
1106     PetscCall(PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"));
1107   } else {
1108     PetscInt n;
1109     n = PetscMin(a->mbs, a->nbs);
1110     diag = a->diag;
1111     for (i=0; i<n; i++) {
1112       if (diag[i] >= ii[i+1]) {
1113         *missing = PETSC_TRUE;
1114         if (d) *d = i;
1115         PetscCall(PetscInfo(A,"Matrix is missing block diagonal number %" PetscInt_FMT "\n",i));
1116         break;
1117       }
1118     }
1119   }
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1124 {
1125   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1126   PetscInt       i,j,m = a->mbs;
1127 
1128   PetscFunctionBegin;
1129   if (!a->diag) {
1130     PetscCall(PetscMalloc1(m,&a->diag));
1131     PetscCall(PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt)));
1132     a->free_diag = PETSC_TRUE;
1133   }
1134   for (i=0; i<m; i++) {
1135     a->diag[i] = a->i[i+1];
1136     for (j=a->i[i]; j<a->i[i+1]; j++) {
1137       if (a->j[j] == i) {
1138         a->diag[i] = j;
1139         break;
1140       }
1141     }
1142   }
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1147 {
1148   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1149   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1150   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1151 
1152   PetscFunctionBegin;
1153   *nn = n;
1154   if (!ia) PetscFunctionReturn(0);
1155   if (symmetric) {
1156     PetscCall(MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja));
1157     nz   = tia[n];
1158   } else {
1159     tia = a->i; tja = a->j;
1160   }
1161 
1162   if (!blockcompressed && bs > 1) {
1163     (*nn) *= bs;
1164     /* malloc & create the natural set of indices */
1165     PetscCall(PetscMalloc1((n+1)*bs,ia));
1166     if (n) {
1167       (*ia)[0] = oshift;
1168       for (j=1; j<bs; j++) {
1169         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1170       }
1171     }
1172 
1173     for (i=1; i<n; i++) {
1174       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1175       for (j=1; j<bs; j++) {
1176         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1177       }
1178     }
1179     if (n) {
1180       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1181     }
1182 
1183     if (inja) {
1184       PetscCall(PetscMalloc1(nz*bs*bs,ja));
1185       cnt = 0;
1186       for (i=0; i<n; i++) {
1187         for (j=0; j<bs; j++) {
1188           for (k=tia[i]; k<tia[i+1]; k++) {
1189             for (l=0; l<bs; l++) {
1190               (*ja)[cnt++] = bs*tja[k] + l;
1191             }
1192           }
1193         }
1194       }
1195     }
1196 
1197     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1198       PetscCall(PetscFree(tia));
1199       PetscCall(PetscFree(tja));
1200     }
1201   } else if (oshift == 1) {
1202     if (symmetric) {
1203       nz = tia[A->rmap->n/bs];
1204       /*  add 1 to i and j indices */
1205       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1206       *ia = tia;
1207       if (ja) {
1208         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1209         *ja = tja;
1210       }
1211     } else {
1212       nz = a->i[A->rmap->n/bs];
1213       /* malloc space and  add 1 to i and j indices */
1214       PetscCall(PetscMalloc1(A->rmap->n/bs+1,ia));
1215       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1216       if (ja) {
1217         PetscCall(PetscMalloc1(nz,ja));
1218         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1219       }
1220     }
1221   } else {
1222     *ia = tia;
1223     if (ja) *ja = tja;
1224   }
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1229 {
1230   PetscFunctionBegin;
1231   if (!ia) PetscFunctionReturn(0);
1232   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1233     PetscCall(PetscFree(*ia));
1234     if (ja) PetscCall(PetscFree(*ja));
1235   }
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1240 {
1241   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1242 
1243   PetscFunctionBegin;
1244 #if defined(PETSC_USE_LOG)
1245   PetscLogObjectState((PetscObject)A,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT,A->rmap->N,A->cmap->n,a->nz);
1246 #endif
1247   PetscCall(MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i));
1248   PetscCall(ISDestroy(&a->row));
1249   PetscCall(ISDestroy(&a->col));
1250   if (a->free_diag) PetscCall(PetscFree(a->diag));
1251   PetscCall(PetscFree(a->idiag));
1252   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax,a->ilen));
1253   PetscCall(PetscFree(a->solve_work));
1254   PetscCall(PetscFree(a->mult_work));
1255   PetscCall(PetscFree(a->sor_workt));
1256   PetscCall(PetscFree(a->sor_work));
1257   PetscCall(ISDestroy(&a->icol));
1258   PetscCall(PetscFree(a->saved_values));
1259   PetscCall(PetscFree2(a->compressedrow.i,a->compressedrow.rindex));
1260 
1261   PetscCall(MatDestroy(&a->sbaijMat));
1262   PetscCall(MatDestroy(&a->parent));
1263   PetscCall(PetscFree(A->data));
1264 
1265   PetscCall(PetscObjectChangeTypeName((PetscObject)A,NULL));
1266   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL));
1267   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL));
1268   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL));
1269   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL));
1270   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL));
1271   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL));
1272   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL));
1273   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL));
1274   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL));
1275   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL));
1276   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL));
1277 #if defined(PETSC_HAVE_HYPRE)
1278   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL));
1279 #endif
1280   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL));
1281   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL));
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1286 {
1287   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1288 
1289   PetscFunctionBegin;
1290   switch (op) {
1291   case MAT_ROW_ORIENTED:
1292     a->roworiented = flg;
1293     break;
1294   case MAT_KEEP_NONZERO_PATTERN:
1295     a->keepnonzeropattern = flg;
1296     break;
1297   case MAT_NEW_NONZERO_LOCATIONS:
1298     a->nonew = (flg ? 0 : 1);
1299     break;
1300   case MAT_NEW_NONZERO_LOCATION_ERR:
1301     a->nonew = (flg ? -1 : 0);
1302     break;
1303   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1304     a->nonew = (flg ? -2 : 0);
1305     break;
1306   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1307     a->nounused = (flg ? -1 : 0);
1308     break;
1309   case MAT_FORCE_DIAGONAL_ENTRIES:
1310   case MAT_IGNORE_OFF_PROC_ENTRIES:
1311   case MAT_USE_HASH_TABLE:
1312   case MAT_SORTED_FULL:
1313     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1314     break;
1315   case MAT_SPD:
1316   case MAT_SYMMETRIC:
1317   case MAT_STRUCTURALLY_SYMMETRIC:
1318   case MAT_HERMITIAN:
1319   case MAT_SYMMETRY_ETERNAL:
1320   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1321   case MAT_SUBMAT_SINGLEIS:
1322   case MAT_STRUCTURE_ONLY:
1323   case MAT_SPD_ETERNAL:
1324     /* if the diagonal matrix is square it inherits some of the properties above */
1325     break;
1326   default:
1327     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1328   }
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1333 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1334 {
1335   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1336   MatScalar      *aa_i;
1337   PetscScalar    *v_i;
1338 
1339   PetscFunctionBegin;
1340   bs  = A->rmap->bs;
1341   bs2 = bs*bs;
1342   PetscCheck(row >= 0 && row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range", row);
1343 
1344   bn  = row/bs;   /* Block number */
1345   bp  = row % bs; /* Block Position */
1346   M   = ai[bn+1] - ai[bn];
1347   *nz = bs*M;
1348 
1349   if (v) {
1350     *v = NULL;
1351     if (*nz) {
1352       PetscCall(PetscMalloc1(*nz,v));
1353       for (i=0; i<M; i++) { /* for each block in the block row */
1354         v_i  = *v + i*bs;
1355         aa_i = aa + bs2*(ai[bn] + i);
1356         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1357       }
1358     }
1359   }
1360 
1361   if (idx) {
1362     *idx = NULL;
1363     if (*nz) {
1364       PetscCall(PetscMalloc1(*nz,idx));
1365       for (i=0; i<M; i++) { /* for each block in the block row */
1366         idx_i = *idx + i*bs;
1367         itmp  = bs*aj[ai[bn] + i];
1368         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1369       }
1370     }
1371   }
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1376 {
1377   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1378 
1379   PetscFunctionBegin;
1380   PetscCall(MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a));
1381   PetscFunctionReturn(0);
1382 }
1383 
1384 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1385 {
1386   PetscFunctionBegin;
1387   if (nz)  *nz = 0;
1388   if (idx) PetscCall(PetscFree(*idx));
1389   if (v)   PetscCall(PetscFree(*v));
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1394 {
1395   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1396   Mat            C;
1397   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1398   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1399   MatScalar      *ata,*aa=a->a;
1400 
1401   PetscFunctionBegin;
1402   PetscCall(PetscCalloc1(1+nbs,&atfill));
1403   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1404     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1405 
1406     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&C));
1407     PetscCall(MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N));
1408     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
1409     PetscCall(MatSeqBAIJSetPreallocation(C,bs,0,atfill));
1410 
1411     at  = (Mat_SeqBAIJ*)C->data;
1412     ati = at->i;
1413     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1414   } else {
1415     C = *B;
1416     at = (Mat_SeqBAIJ*)C->data;
1417     ati = at->i;
1418   }
1419 
1420   atj = at->j;
1421   ata = at->a;
1422 
1423   /* Copy ati into atfill so we have locations of the next free space in atj */
1424   PetscCall(PetscArraycpy(atfill,ati,nbs));
1425 
1426   /* Walk through A row-wise and mark nonzero entries of A^T. */
1427   for (i=0; i<mbs; i++) {
1428     anzj = ai[i+1] - ai[i];
1429     for (j=0; j<anzj; j++) {
1430       atj[atfill[*aj]] = i;
1431       for (kr=0; kr<bs; kr++) {
1432         for (k=0; k<bs; k++) {
1433           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1434         }
1435       }
1436       atfill[*aj++] += 1;
1437     }
1438   }
1439   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
1440   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
1441 
1442   /* Clean up temporary space and complete requests. */
1443   PetscCall(PetscFree(atfill));
1444 
1445   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1446     PetscCall(MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs)));
1447     *B = C;
1448   } else {
1449     PetscCall(MatHeaderMerge(A,&C));
1450   }
1451   PetscFunctionReturn(0);
1452 }
1453 
1454 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1455 {
1456   Mat            Btrans;
1457 
1458   PetscFunctionBegin;
1459   *f   = PETSC_FALSE;
1460   PetscCall(MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans));
1461   PetscCall(MatEqual_SeqBAIJ(B,Btrans,f));
1462   PetscCall(MatDestroy(&Btrans));
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1467 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
1468 {
1469   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)mat->data;
1470   PetscInt       header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1471   PetscInt       *rowlens,*colidxs;
1472   PetscScalar    *matvals;
1473 
1474   PetscFunctionBegin;
1475   PetscCall(PetscViewerSetUp(viewer));
1476 
1477   M  = mat->rmap->N;
1478   N  = mat->cmap->N;
1479   m  = mat->rmap->n;
1480   bs = mat->rmap->bs;
1481   nz = bs*bs*A->nz;
1482 
1483   /* write matrix header */
1484   header[0] = MAT_FILE_CLASSID;
1485   header[1] = M; header[2] = N; header[3] = nz;
1486   PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT));
1487 
1488   /* store row lengths */
1489   PetscCall(PetscMalloc1(m,&rowlens));
1490   for (cnt=0, i=0; i<A->mbs; i++)
1491     for (j=0; j<bs; j++)
1492       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1493   PetscCall(PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT));
1494   PetscCall(PetscFree(rowlens));
1495 
1496   /* store column indices  */
1497   PetscCall(PetscMalloc1(nz,&colidxs));
1498   for (cnt=0, i=0; i<A->mbs; i++)
1499     for (k=0; k<bs; k++)
1500       for (j=A->i[i]; j<A->i[i+1]; j++)
1501         for (l=0; l<bs; l++)
1502           colidxs[cnt++] = bs*A->j[j] + l;
1503   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1504   PetscCall(PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT));
1505   PetscCall(PetscFree(colidxs));
1506 
1507   /* store nonzero values */
1508   PetscCall(PetscMalloc1(nz,&matvals));
1509   for (cnt=0, i=0; i<A->mbs; i++)
1510     for (k=0; k<bs; k++)
1511       for (j=A->i[i]; j<A->i[i+1]; j++)
1512         for (l=0; l<bs; l++)
1513           matvals[cnt++] = A->a[bs*(bs*j + l) + k];
1514   PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz);
1515   PetscCall(PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR));
1516   PetscCall(PetscFree(matvals));
1517 
1518   /* write block size option to the viewer's .info file */
1519   PetscCall(MatView_Binary_BlockSizes(mat,viewer));
1520   PetscFunctionReturn(0);
1521 }
1522 
1523 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1524 {
1525   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1526   PetscInt       i,bs = A->rmap->bs,k;
1527 
1528   PetscFunctionBegin;
1529   PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
1530   for (i=0; i<a->mbs; i++) {
1531     PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT "-%" PetscInt_FMT ":",i*bs,i*bs+bs-1));
1532     for (k=a->i[i]; k<a->i[i+1]; k++) {
1533       PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT "-%" PetscInt_FMT ") ",bs*a->j[k],bs*a->j[k]+bs-1));
1534     }
1535     PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
1536   }
1537   PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1542 {
1543   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1544   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1545   PetscViewerFormat format;
1546 
1547   PetscFunctionBegin;
1548   if (A->structure_only) {
1549     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A,viewer));
1550     PetscFunctionReturn(0);
1551   }
1552 
1553   PetscCall(PetscViewerGetFormat(viewer,&format));
1554   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1555     PetscCall(PetscViewerASCIIPrintf(viewer,"  block size is %" PetscInt_FMT "\n",bs));
1556   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1557     const char *matname;
1558     Mat        aij;
1559     PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij));
1560     PetscCall(PetscObjectGetName((PetscObject)A,&matname));
1561     PetscCall(PetscObjectSetName((PetscObject)aij,matname));
1562     PetscCall(MatView(aij,viewer));
1563     PetscCall(MatDestroy(&aij));
1564   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1565       PetscFunctionReturn(0);
1566   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1567     PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
1568     for (i=0; i<a->mbs; i++) {
1569       for (j=0; j<bs; j++) {
1570         PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j));
1571         for (k=a->i[i]; k<a->i[i+1]; k++) {
1572           for (l=0; l<bs; l++) {
1573 #if defined(PETSC_USE_COMPLEX)
1574             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1575               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %gi) ",bs*a->j[k]+l,
1576                                              (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1577             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1578               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %gi) ",bs*a->j[k]+l,
1579                                              (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1580             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1581               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j])));
1582             }
1583 #else
1584             if (a->a[bs2*k + l*bs + j] != 0.0) {
1585               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]));
1586             }
1587 #endif
1588           }
1589         }
1590         PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
1591       }
1592     }
1593     PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
1594   } else {
1595     PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
1596     for (i=0; i<a->mbs; i++) {
1597       for (j=0; j<bs; j++) {
1598         PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j));
1599         for (k=a->i[i]; k<a->i[i+1]; k++) {
1600           for (l=0; l<bs; l++) {
1601 #if defined(PETSC_USE_COMPLEX)
1602             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1603               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i) ",bs*a->j[k]+l,
1604                                              (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1605             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1606               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i) ",bs*a->j[k]+l,
1607                                              (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j])));
1608             } else {
1609               PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j])));
1610             }
1611 #else
1612             PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]));
1613 #endif
1614           }
1615         }
1616         PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
1617       }
1618     }
1619     PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
1620   }
1621   PetscCall(PetscViewerFlush(viewer));
1622   PetscFunctionReturn(0);
1623 }
1624 
1625 #include <petscdraw.h>
1626 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1627 {
1628   Mat               A = (Mat) Aa;
1629   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1630   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1631   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1632   MatScalar         *aa;
1633   PetscViewer       viewer;
1634   PetscViewerFormat format;
1635 
1636   PetscFunctionBegin;
1637   PetscCall(PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer));
1638   PetscCall(PetscViewerGetFormat(viewer,&format));
1639   PetscCall(PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr));
1640 
1641   /* loop over matrix elements drawing boxes */
1642 
1643   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1644     PetscDrawCollectiveBegin(draw);
1645     /* Blue for negative, Cyan for zero and  Red for positive */
1646     color = PETSC_DRAW_BLUE;
1647     for (i=0,row=0; i<mbs; i++,row+=bs) {
1648       for (j=a->i[i]; j<a->i[i+1]; j++) {
1649         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1650         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1651         aa  = a->a + j*bs2;
1652         for (k=0; k<bs; k++) {
1653           for (l=0; l<bs; l++) {
1654             if (PetscRealPart(*aa++) >=  0.) continue;
1655             PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color));
1656           }
1657         }
1658       }
1659     }
1660     color = PETSC_DRAW_CYAN;
1661     for (i=0,row=0; i<mbs; i++,row+=bs) {
1662       for (j=a->i[i]; j<a->i[i+1]; j++) {
1663         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1664         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1665         aa  = a->a + j*bs2;
1666         for (k=0; k<bs; k++) {
1667           for (l=0; l<bs; l++) {
1668             if (PetscRealPart(*aa++) != 0.) continue;
1669             PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color));
1670           }
1671         }
1672       }
1673     }
1674     color = PETSC_DRAW_RED;
1675     for (i=0,row=0; i<mbs; i++,row+=bs) {
1676       for (j=a->i[i]; j<a->i[i+1]; j++) {
1677         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1678         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1679         aa  = a->a + j*bs2;
1680         for (k=0; k<bs; k++) {
1681           for (l=0; l<bs; l++) {
1682             if (PetscRealPart(*aa++) <= 0.) continue;
1683             PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color));
1684           }
1685         }
1686       }
1687     }
1688     PetscDrawCollectiveEnd(draw);
1689   } else {
1690     /* use contour shading to indicate magnitude of values */
1691     /* first determine max of all nonzero values */
1692     PetscReal      minv = 0.0, maxv = 0.0;
1693     PetscDraw      popup;
1694 
1695     for (i=0; i<a->nz*a->bs2; i++) {
1696       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1697     }
1698     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1699     PetscCall(PetscDrawGetPopup(draw,&popup));
1700     PetscCall(PetscDrawScalePopup(popup,0.0,maxv));
1701 
1702     PetscDrawCollectiveBegin(draw);
1703     for (i=0,row=0; i<mbs; i++,row+=bs) {
1704       for (j=a->i[i]; j<a->i[i+1]; j++) {
1705         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1706         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1707         aa  = a->a + j*bs2;
1708         for (k=0; k<bs; k++) {
1709           for (l=0; l<bs; l++) {
1710             MatScalar v = *aa++;
1711             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1712             PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color));
1713           }
1714         }
1715       }
1716     }
1717     PetscDrawCollectiveEnd(draw);
1718   }
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1723 {
1724   PetscReal      xl,yl,xr,yr,w,h;
1725   PetscDraw      draw;
1726   PetscBool      isnull;
1727 
1728   PetscFunctionBegin;
1729   PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
1730   PetscCall(PetscDrawIsNull(draw,&isnull));
1731   if (isnull) PetscFunctionReturn(0);
1732 
1733   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1734   xr  += w;          yr += h;        xl = -w;     yl = -h;
1735   PetscCall(PetscDrawSetCoordinates(draw,xl,yl,xr,yr));
1736   PetscCall(PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer));
1737   PetscCall(PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A));
1738   PetscCall(PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL));
1739   PetscCall(PetscDrawSave(draw));
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1744 {
1745   PetscBool      iascii,isbinary,isdraw;
1746 
1747   PetscFunctionBegin;
1748   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
1749   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
1750   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
1751   if (iascii) {
1752     PetscCall(MatView_SeqBAIJ_ASCII(A,viewer));
1753   } else if (isbinary) {
1754     PetscCall(MatView_SeqBAIJ_Binary(A,viewer));
1755   } else if (isdraw) {
1756     PetscCall(MatView_SeqBAIJ_Draw(A,viewer));
1757   } else {
1758     Mat B;
1759     PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
1760     PetscCall(MatView(B,viewer));
1761     PetscCall(MatDestroy(&B));
1762   }
1763   PetscFunctionReturn(0);
1764 }
1765 
1766 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1767 {
1768   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1769   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1770   PetscInt    *ai = a->i,*ailen = a->ilen;
1771   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1772   MatScalar   *ap,*aa = a->a;
1773 
1774   PetscFunctionBegin;
1775   for (k=0; k<m; k++) { /* loop over rows */
1776     row = im[k]; brow = row/bs;
1777     if (row < 0) {v += n; continue;} /* negative row */
1778     PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " too large", row);
1779     rp   = aj ? aj + ai[brow] : NULL; /* mustn't add to NULL, that is UB */
1780     ap   = aa ? aa + bs2*ai[brow] : NULL; /* mustn't add to NULL, that is UB */
1781     nrow = ailen[brow];
1782     for (l=0; l<n; l++) { /* loop over columns */
1783       if (in[l] < 0) {v++; continue;} /* negative column */
1784       PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %" PetscInt_FMT " too large", in[l]);
1785       col  = in[l];
1786       bcol = col/bs;
1787       cidx = col%bs;
1788       ridx = row%bs;
1789       high = nrow;
1790       low  = 0; /* assume unsorted */
1791       while (high-low > 5) {
1792         t = (low+high)/2;
1793         if (rp[t] > bcol) high = t;
1794         else             low  = t;
1795       }
1796       for (i=low; i<high; i++) {
1797         if (rp[i] > bcol) break;
1798         if (rp[i] == bcol) {
1799           *v++ = ap[bs2*i+bs*cidx+ridx];
1800           goto finished;
1801         }
1802       }
1803       *v++ = 0.0;
1804 finished:;
1805     }
1806   }
1807   PetscFunctionReturn(0);
1808 }
1809 
1810 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1811 {
1812   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1813   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1814   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1815   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1816   PetscBool         roworiented=a->roworiented;
1817   const PetscScalar *value     = v;
1818   MatScalar         *ap=NULL,*aa = a->a,*bap;
1819 
1820   PetscFunctionBegin;
1821   if (roworiented) {
1822     stepval = (n-1)*bs;
1823   } else {
1824     stepval = (m-1)*bs;
1825   }
1826   for (k=0; k<m; k++) { /* loop over added rows */
1827     row = im[k];
1828     if (row < 0) continue;
1829     PetscCheck(row < a->mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT,row,a->mbs-1);
1830     rp   = aj + ai[row];
1831     if (!A->structure_only) ap = aa + bs2*ai[row];
1832     rmax = imax[row];
1833     nrow = ailen[row];
1834     low  = 0;
1835     high = nrow;
1836     for (l=0; l<n; l++) { /* loop over added columns */
1837       if (in[l] < 0) continue;
1838       PetscCheck(in[l] < a->nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT,in[l],a->nbs-1);
1839       col = in[l];
1840       if (!A->structure_only) {
1841         if (roworiented) {
1842           value = v + (k*(stepval+bs) + l)*bs;
1843         } else {
1844           value = v + (l*(stepval+bs) + k)*bs;
1845         }
1846       }
1847       if (col <= lastcol) low = 0;
1848       else high = nrow;
1849       lastcol = col;
1850       while (high-low > 7) {
1851         t = (low+high)/2;
1852         if (rp[t] > col) high = t;
1853         else             low  = t;
1854       }
1855       for (i=low; i<high; i++) {
1856         if (rp[i] > col) break;
1857         if (rp[i] == col) {
1858           if (A->structure_only) goto noinsert2;
1859           bap = ap +  bs2*i;
1860           if (roworiented) {
1861             if (is == ADD_VALUES) {
1862               for (ii=0; ii<bs; ii++,value+=stepval) {
1863                 for (jj=ii; jj<bs2; jj+=bs) {
1864                   bap[jj] += *value++;
1865                 }
1866               }
1867             } else {
1868               for (ii=0; ii<bs; ii++,value+=stepval) {
1869                 for (jj=ii; jj<bs2; jj+=bs) {
1870                   bap[jj] = *value++;
1871                 }
1872               }
1873             }
1874           } else {
1875             if (is == ADD_VALUES) {
1876               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1877                 for (jj=0; jj<bs; jj++) {
1878                   bap[jj] += value[jj];
1879                 }
1880                 bap += bs;
1881               }
1882             } else {
1883               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1884                 for (jj=0; jj<bs; jj++) {
1885                   bap[jj]  = value[jj];
1886                 }
1887                 bap += bs;
1888               }
1889             }
1890           }
1891           goto noinsert2;
1892         }
1893       }
1894       if (nonew == 1) goto noinsert2;
1895       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
1896       if (A->structure_only) {
1897         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1898       } else {
1899         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1900       }
1901       N = nrow++ - 1; high++;
1902       /* shift up all the later entries in this row */
1903       PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
1904       rp[i] = col;
1905       if (!A->structure_only) {
1906         PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
1907         bap   = ap +  bs2*i;
1908         if (roworiented) {
1909           for (ii=0; ii<bs; ii++,value+=stepval) {
1910             for (jj=ii; jj<bs2; jj+=bs) {
1911               bap[jj] = *value++;
1912             }
1913           }
1914         } else {
1915           for (ii=0; ii<bs; ii++,value+=stepval) {
1916             for (jj=0; jj<bs; jj++) {
1917               *bap++ = *value++;
1918             }
1919           }
1920         }
1921       }
1922 noinsert2:;
1923       low = i;
1924     }
1925     ailen[row] = nrow;
1926   }
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1931 {
1932   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1933   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1934   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1935   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1936   MatScalar      *aa  = a->a,*ap;
1937   PetscReal      ratio=0.6;
1938 
1939   PetscFunctionBegin;
1940   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1941 
1942   if (m) rmax = ailen[0];
1943   for (i=1; i<mbs; i++) {
1944     /* move each row back by the amount of empty slots (fshift) before it*/
1945     fshift += imax[i-1] - ailen[i-1];
1946     rmax    = PetscMax(rmax,ailen[i]);
1947     if (fshift) {
1948       ip = aj + ai[i];
1949       ap = aa + bs2*ai[i];
1950       N  = ailen[i];
1951       PetscCall(PetscArraymove(ip-fshift,ip,N));
1952       if (!A->structure_only) {
1953         PetscCall(PetscArraymove(ap-bs2*fshift,ap,bs2*N));
1954       }
1955     }
1956     ai[i] = ai[i-1] + ailen[i-1];
1957   }
1958   if (mbs) {
1959     fshift += imax[mbs-1] - ailen[mbs-1];
1960     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1961   }
1962 
1963   /* reset ilen and imax for each row */
1964   a->nonzerorowcnt = 0;
1965   if (A->structure_only) {
1966     PetscCall(PetscFree2(a->imax,a->ilen));
1967   } else { /* !A->structure_only */
1968     for (i=0; i<mbs; i++) {
1969       ailen[i] = imax[i] = ai[i+1] - ai[i];
1970       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1971     }
1972   }
1973   a->nz = ai[mbs];
1974 
1975   /* diagonals may have moved, so kill the diagonal pointers */
1976   a->idiagvalid = PETSC_FALSE;
1977   if (fshift && a->diag) {
1978     PetscCall(PetscFree(a->diag));
1979     PetscCall(PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt)));
1980     a->diag = NULL;
1981   }
1982   if (fshift) PetscCheck(a->nounused != -1,PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1983   PetscCall(PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2));
1984   PetscCall(PetscInfo(A,"Number of mallocs during MatSetValues is %" PetscInt_FMT "\n",a->reallocs));
1985   PetscCall(PetscInfo(A,"Most nonzeros blocks in any row is %" PetscInt_FMT "\n",rmax));
1986 
1987   A->info.mallocs    += a->reallocs;
1988   a->reallocs         = 0;
1989   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1990   a->rmax             = rmax;
1991 
1992   if (!A->structure_only) {
1993     PetscCall(MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio));
1994   }
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 /*
1999    This function returns an array of flags which indicate the locations of contiguous
2000    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2001    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2002    Assume: sizes should be long enough to hold all the values.
2003 */
2004 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2005 {
2006   PetscInt  i,j,k,row;
2007   PetscBool flg;
2008 
2009   PetscFunctionBegin;
2010   for (i=0,j=0; i<n; j++) {
2011     row = idx[i];
2012     if (row%bs!=0) { /* Not the beginning of a block */
2013       sizes[j] = 1;
2014       i++;
2015     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2016       sizes[j] = 1;         /* Also makes sure at least 'bs' values exist for next else */
2017       i++;
2018     } else { /* Beginning of the block, so check if the complete block exists */
2019       flg = PETSC_TRUE;
2020       for (k=1; k<bs; k++) {
2021         if (row+k != idx[i+k]) { /* break in the block */
2022           flg = PETSC_FALSE;
2023           break;
2024         }
2025       }
2026       if (flg) { /* No break in the bs */
2027         sizes[j] = bs;
2028         i       += bs;
2029       } else {
2030         sizes[j] = 1;
2031         i++;
2032       }
2033     }
2034   }
2035   *bs_max = j;
2036   PetscFunctionReturn(0);
2037 }
2038 
2039 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2040 {
2041   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2042   PetscInt          i,j,k,count,*rows;
2043   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2044   PetscScalar       zero = 0.0;
2045   MatScalar         *aa;
2046   const PetscScalar *xx;
2047   PetscScalar       *bb;
2048 
2049   PetscFunctionBegin;
2050   /* fix right hand side if needed */
2051   if (x && b) {
2052     PetscCall(VecGetArrayRead(x,&xx));
2053     PetscCall(VecGetArray(b,&bb));
2054     for (i=0; i<is_n; i++) {
2055       bb[is_idx[i]] = diag*xx[is_idx[i]];
2056     }
2057     PetscCall(VecRestoreArrayRead(x,&xx));
2058     PetscCall(VecRestoreArray(b,&bb));
2059   }
2060 
2061   /* Make a copy of the IS and  sort it */
2062   /* allocate memory for rows,sizes */
2063   PetscCall(PetscMalloc2(is_n,&rows,2*is_n,&sizes));
2064 
2065   /* copy IS values to rows, and sort them */
2066   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2067   PetscCall(PetscSortInt(is_n,rows));
2068 
2069   if (baij->keepnonzeropattern) {
2070     for (i=0; i<is_n; i++) sizes[i] = 1;
2071     bs_max          = is_n;
2072   } else {
2073     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max));
2074     A->nonzerostate++;
2075   }
2076 
2077   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2078     row = rows[j];
2079     PetscCheck(row >= 0 && row <= A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %" PetscInt_FMT " out of range",row);
2080     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2081     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2082     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2083       if (diag != (PetscScalar)0.0) {
2084         if (baij->ilen[row/bs] > 0) {
2085           baij->ilen[row/bs]       = 1;
2086           baij->j[baij->i[row/bs]] = row/bs;
2087 
2088           PetscCall(PetscArrayzero(aa,count*bs));
2089         }
2090         /* Now insert all the diagonal values for this bs */
2091         for (k=0; k<bs; k++) {
2092           PetscCall((*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES));
2093         }
2094       } else { /* (diag == 0.0) */
2095         baij->ilen[row/bs] = 0;
2096       } /* end (diag == 0.0) */
2097     } else { /* (sizes[i] != bs) */
2098       PetscAssert(sizes[i] == 1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2099       for (k=0; k<count; k++) {
2100         aa[0] =  zero;
2101         aa   += bs;
2102       }
2103       if (diag != (PetscScalar)0.0) {
2104         PetscCall((*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES));
2105       }
2106     }
2107   }
2108 
2109   PetscCall(PetscFree2(rows,sizes));
2110   PetscCall(MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY));
2111   PetscFunctionReturn(0);
2112 }
2113 
2114 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2115 {
2116   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2117   PetscInt          i,j,k,count;
2118   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2119   PetscScalar       zero = 0.0;
2120   MatScalar         *aa;
2121   const PetscScalar *xx;
2122   PetscScalar       *bb;
2123   PetscBool         *zeroed,vecs = PETSC_FALSE;
2124 
2125   PetscFunctionBegin;
2126   /* fix right hand side if needed */
2127   if (x && b) {
2128     PetscCall(VecGetArrayRead(x,&xx));
2129     PetscCall(VecGetArray(b,&bb));
2130     vecs = PETSC_TRUE;
2131   }
2132 
2133   /* zero the columns */
2134   PetscCall(PetscCalloc1(A->rmap->n,&zeroed));
2135   for (i=0; i<is_n; i++) {
2136     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %" PetscInt_FMT " out of range",is_idx[i]);
2137     zeroed[is_idx[i]] = PETSC_TRUE;
2138   }
2139   for (i=0; i<A->rmap->N; i++) {
2140     if (!zeroed[i]) {
2141       row = i/bs;
2142       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2143         for (k=0; k<bs; k++) {
2144           col = bs*baij->j[j] + k;
2145           if (zeroed[col]) {
2146             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2147             if (vecs) bb[i] -= aa[0]*xx[col];
2148             aa[0] = 0.0;
2149           }
2150         }
2151       }
2152     } else if (vecs) bb[i] = diag*xx[i];
2153   }
2154   PetscCall(PetscFree(zeroed));
2155   if (vecs) {
2156     PetscCall(VecRestoreArrayRead(x,&xx));
2157     PetscCall(VecRestoreArray(b,&bb));
2158   }
2159 
2160   /* zero the rows */
2161   for (i=0; i<is_n; i++) {
2162     row   = is_idx[i];
2163     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2164     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2165     for (k=0; k<count; k++) {
2166       aa[0] =  zero;
2167       aa   += bs;
2168     }
2169     if (diag != (PetscScalar)0.0) {
2170       PetscCall((*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES));
2171     }
2172   }
2173   PetscCall(MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY));
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2178 {
2179   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2180   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2181   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2182   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2183   PetscInt       ridx,cidx,bs2=a->bs2;
2184   PetscBool      roworiented=a->roworiented;
2185   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2186 
2187   PetscFunctionBegin;
2188   for (k=0; k<m; k++) { /* loop over added rows */
2189     row  = im[k];
2190     brow = row/bs;
2191     if (row < 0) continue;
2192     PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->N-1);
2193     rp   = aj + ai[brow];
2194     if (!A->structure_only) ap = aa + bs2*ai[brow];
2195     rmax = imax[brow];
2196     nrow = ailen[brow];
2197     low  = 0;
2198     high = nrow;
2199     for (l=0; l<n; l++) { /* loop over added columns */
2200       if (in[l] < 0) continue;
2201       PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[l],A->cmap->n-1);
2202       col  = in[l]; bcol = col/bs;
2203       ridx = row % bs; cidx = col % bs;
2204       if (!A->structure_only) {
2205         if (roworiented) {
2206           value = v[l + k*n];
2207         } else {
2208           value = v[k + l*m];
2209         }
2210       }
2211       if (col <= lastcol) low = 0; else high = nrow;
2212       lastcol = col;
2213       while (high-low > 7) {
2214         t = (low+high)/2;
2215         if (rp[t] > bcol) high = t;
2216         else              low  = t;
2217       }
2218       for (i=low; i<high; i++) {
2219         if (rp[i] > bcol) break;
2220         if (rp[i] == bcol) {
2221           bap = ap +  bs2*i + bs*cidx + ridx;
2222           if (!A->structure_only) {
2223             if (is == ADD_VALUES) *bap += value;
2224             else                  *bap  = value;
2225           }
2226           goto noinsert1;
2227         }
2228       }
2229       if (nonew == 1) goto noinsert1;
2230       PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2231       if (A->structure_only) {
2232         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2233       } else {
2234         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2235       }
2236       N = nrow++ - 1; high++;
2237       /* shift up all the later entries in this row */
2238       PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
2239       rp[i] = bcol;
2240       if (!A->structure_only) {
2241         PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
2242         PetscCall(PetscArrayzero(ap+bs2*i,bs2));
2243         ap[bs2*i + bs*cidx + ridx] = value;
2244       }
2245       a->nz++;
2246       A->nonzerostate++;
2247 noinsert1:;
2248       low = i;
2249     }
2250     ailen[brow] = nrow;
2251   }
2252   PetscFunctionReturn(0);
2253 }
2254 
2255 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2256 {
2257   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2258   Mat            outA;
2259   PetscBool      row_identity,col_identity;
2260 
2261   PetscFunctionBegin;
2262   PetscCheck(info->levels == 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2263   PetscCall(ISIdentity(row,&row_identity));
2264   PetscCall(ISIdentity(col,&col_identity));
2265   PetscCheck(row_identity && col_identity,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2266 
2267   outA            = inA;
2268   inA->factortype = MAT_FACTOR_LU;
2269   PetscCall(PetscFree(inA->solvertype));
2270   PetscCall(PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype));
2271 
2272   PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2273 
2274   PetscCall(PetscObjectReference((PetscObject)row));
2275   PetscCall(ISDestroy(&a->row));
2276   a->row = row;
2277   PetscCall(PetscObjectReference((PetscObject)col));
2278   PetscCall(ISDestroy(&a->col));
2279   a->col = col;
2280 
2281   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2282   PetscCall(ISDestroy(&a->icol));
2283   PetscCall(ISInvertPermutation(col,PETSC_DECIDE,&a->icol));
2284   PetscCall(PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol));
2285 
2286   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity)));
2287   if (!a->solve_work) {
2288     PetscCall(PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work));
2289     PetscCall(PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar)));
2290   }
2291   PetscCall(MatLUFactorNumeric(outA,inA,info));
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2296 {
2297   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2298   PetscInt    i,nz,mbs;
2299 
2300   PetscFunctionBegin;
2301   nz  = baij->maxnz;
2302   mbs = baij->mbs;
2303   for (i=0; i<nz; i++) {
2304     baij->j[i] = indices[i];
2305   }
2306   baij->nz = nz;
2307   for (i=0; i<mbs; i++) {
2308     baij->ilen[i] = baij->imax[i];
2309   }
2310   PetscFunctionReturn(0);
2311 }
2312 
2313 /*@
2314     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2315        in the matrix.
2316 
2317   Input Parameters:
2318 +  mat - the SeqBAIJ matrix
2319 -  indices - the column indices
2320 
2321   Level: advanced
2322 
2323   Notes:
2324     This can be called if you have precomputed the nonzero structure of the
2325   matrix and want to provide it to the matrix object to improve the performance
2326   of the MatSetValues() operation.
2327 
2328     You MUST have set the correct numbers of nonzeros per row in the call to
2329   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2330 
2331     MUST be called before any calls to MatSetValues();
2332 
2333 @*/
2334 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2335 {
2336   PetscFunctionBegin;
2337   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2338   PetscValidIntPointer(indices,2);
2339   PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));
2340   PetscFunctionReturn(0);
2341 }
2342 
2343 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2344 {
2345   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2346   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2347   PetscReal      atmp;
2348   PetscScalar    *x,zero = 0.0;
2349   MatScalar      *aa;
2350   PetscInt       ncols,brow,krow,kcol;
2351 
2352   PetscFunctionBegin;
2353   /* why is this not a macro???????????????????????????????????????????????????????????????? */
2354   PetscCheck(!A->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2355   bs  = A->rmap->bs;
2356   aa  = a->a;
2357   ai  = a->i;
2358   aj  = a->j;
2359   mbs = a->mbs;
2360 
2361   PetscCall(VecSet(v,zero));
2362   PetscCall(VecGetArray(v,&x));
2363   PetscCall(VecGetLocalSize(v,&n));
2364   PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2365   for (i=0; i<mbs; i++) {
2366     ncols = ai[1] - ai[0]; ai++;
2367     brow  = bs*i;
2368     for (j=0; j<ncols; j++) {
2369       for (kcol=0; kcol<bs; kcol++) {
2370         for (krow=0; krow<bs; krow++) {
2371           atmp = PetscAbsScalar(*aa);aa++;
2372           row  = brow + krow;   /* row index */
2373           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2374         }
2375       }
2376       aj++;
2377     }
2378   }
2379   PetscCall(VecRestoreArray(v,&x));
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2384 {
2385   PetscFunctionBegin;
2386   /* If the two matrices have the same copy implementation, use fast copy. */
2387   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2388     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2389     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2390     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2391 
2392     PetscCheck(a->i[ambs] == b->i[bmbs],PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different",a->i[ambs],b->i[bmbs]);
2393     PetscCheck(abs == bbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different",abs,bbs);
2394     PetscCall(PetscArraycpy(b->a,a->a,bs2*a->i[ambs]));
2395     PetscCall(PetscObjectStateIncrease((PetscObject)B));
2396   } else {
2397     PetscCall(MatCopy_Basic(A,B,str));
2398   }
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2403 {
2404   PetscFunctionBegin;
2405   PetscCall(MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL));
2406   PetscFunctionReturn(0);
2407 }
2408 
2409 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2410 {
2411   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2412 
2413   PetscFunctionBegin;
2414   *array = a->a;
2415   PetscFunctionReturn(0);
2416 }
2417 
2418 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2419 {
2420   PetscFunctionBegin;
2421   *array = NULL;
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2426 {
2427   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2428   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2429   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2430 
2431   PetscFunctionBegin;
2432   /* Set the number of nonzeros in the new matrix */
2433   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz));
2434   PetscFunctionReturn(0);
2435 }
2436 
2437 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2438 {
2439   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2440   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2441   PetscBLASInt   one=1;
2442 
2443   PetscFunctionBegin;
2444   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2445     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2446     if (e) {
2447       PetscCall(PetscArraycmp(x->i,y->i,x->mbs+1,&e));
2448       if (e) {
2449         PetscCall(PetscArraycmp(x->j,y->j,x->i[x->mbs],&e));
2450         if (e) str = SAME_NONZERO_PATTERN;
2451       }
2452     }
2453     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"MatStructure is not SAME_NONZERO_PATTERN");
2454   }
2455   if (str == SAME_NONZERO_PATTERN) {
2456     PetscScalar  alpha = a;
2457     PetscBLASInt bnz;
2458     PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz));
2459     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2460     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2461   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2462     PetscCall(MatAXPY_Basic(Y,a,X,str));
2463   } else {
2464     Mat      B;
2465     PetscInt *nnz;
2466     PetscCheck(bs == X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2467     PetscCall(PetscMalloc1(Y->rmap->N,&nnz));
2468     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
2469     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
2470     PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N));
2471     PetscCall(MatSetBlockSizesFromMats(B,Y,Y));
2472     PetscCall(MatSetType(B,(MatType) ((PetscObject)Y)->type_name));
2473     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz));
2474     PetscCall(MatSeqBAIJSetPreallocation(B,bs,0,nnz));
2475     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
2476     PetscCall(MatHeaderMerge(Y,&B));
2477     PetscCall(PetscFree(nnz));
2478   }
2479   PetscFunctionReturn(0);
2480 }
2481 
2482 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2483 {
2484 #if defined(PETSC_USE_COMPLEX)
2485   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2486   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2487   MatScalar   *aa = a->a;
2488 
2489   PetscFunctionBegin;
2490   for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]);
2491 #else
2492   PetscFunctionBegin;
2493 #endif
2494   PetscFunctionReturn(0);
2495 }
2496 
2497 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2498 {
2499   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2500   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2501   MatScalar   *aa = a->a;
2502 
2503   PetscFunctionBegin;
2504   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2505   PetscFunctionReturn(0);
2506 }
2507 
2508 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2509 {
2510   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2511   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2512   MatScalar   *aa = a->a;
2513 
2514   PetscFunctionBegin;
2515   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2516   PetscFunctionReturn(0);
2517 }
2518 
2519 /*
2520     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2521 */
2522 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2523 {
2524   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2525   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2526   PetscInt       nz = a->i[m],row,*jj,mr,col;
2527 
2528   PetscFunctionBegin;
2529   *nn = n;
2530   if (!ia) PetscFunctionReturn(0);
2531   PetscCheck(!symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2532   PetscCall(PetscCalloc1(n,&collengths));
2533   PetscCall(PetscMalloc1(n+1,&cia));
2534   PetscCall(PetscMalloc1(nz,&cja));
2535   jj   = a->j;
2536   for (i=0; i<nz; i++) {
2537     collengths[jj[i]]++;
2538   }
2539   cia[0] = oshift;
2540   for (i=0; i<n; i++) {
2541     cia[i+1] = cia[i] + collengths[i];
2542   }
2543   PetscCall(PetscArrayzero(collengths,n));
2544   jj   = a->j;
2545   for (row=0; row<m; row++) {
2546     mr = a->i[row+1] - a->i[row];
2547     for (i=0; i<mr; i++) {
2548       col = *jj++;
2549 
2550       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2551     }
2552   }
2553   PetscCall(PetscFree(collengths));
2554   *ia  = cia; *ja = cja;
2555   PetscFunctionReturn(0);
2556 }
2557 
2558 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2559 {
2560   PetscFunctionBegin;
2561   if (!ia) PetscFunctionReturn(0);
2562   PetscCall(PetscFree(*ia));
2563   PetscCall(PetscFree(*ja));
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 /*
2568  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2569  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2570  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2571  */
2572 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2573 {
2574   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2575   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2576   PetscInt       nz = a->i[m],row,*jj,mr,col;
2577   PetscInt       *cspidx;
2578 
2579   PetscFunctionBegin;
2580   *nn = n;
2581   if (!ia) PetscFunctionReturn(0);
2582 
2583   PetscCall(PetscCalloc1(n,&collengths));
2584   PetscCall(PetscMalloc1(n+1,&cia));
2585   PetscCall(PetscMalloc1(nz,&cja));
2586   PetscCall(PetscMalloc1(nz,&cspidx));
2587   jj   = a->j;
2588   for (i=0; i<nz; i++) {
2589     collengths[jj[i]]++;
2590   }
2591   cia[0] = oshift;
2592   for (i=0; i<n; i++) {
2593     cia[i+1] = cia[i] + collengths[i];
2594   }
2595   PetscCall(PetscArrayzero(collengths,n));
2596   jj   = a->j;
2597   for (row=0; row<m; row++) {
2598     mr = a->i[row+1] - a->i[row];
2599     for (i=0; i<mr; i++) {
2600       col = *jj++;
2601       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2602       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2603     }
2604   }
2605   PetscCall(PetscFree(collengths));
2606   *ia    = cia;
2607   *ja    = cja;
2608   *spidx = cspidx;
2609   PetscFunctionReturn(0);
2610 }
2611 
2612 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2613 {
2614   PetscFunctionBegin;
2615   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done));
2616   PetscCall(PetscFree(*spidx));
2617   PetscFunctionReturn(0);
2618 }
2619 
2620 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2621 {
2622   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2623 
2624   PetscFunctionBegin;
2625   if (!Y->preallocated || !aij->nz) {
2626     PetscCall(MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL));
2627   }
2628   PetscCall(MatShift_Basic(Y,a));
2629   PetscFunctionReturn(0);
2630 }
2631 
2632 /* -------------------------------------------------------------------*/
2633 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2634                                        MatGetRow_SeqBAIJ,
2635                                        MatRestoreRow_SeqBAIJ,
2636                                        MatMult_SeqBAIJ_N,
2637                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2638                                        MatMultTranspose_SeqBAIJ,
2639                                        MatMultTransposeAdd_SeqBAIJ,
2640                                        NULL,
2641                                        NULL,
2642                                        NULL,
2643                                /* 10*/ NULL,
2644                                        MatLUFactor_SeqBAIJ,
2645                                        NULL,
2646                                        NULL,
2647                                        MatTranspose_SeqBAIJ,
2648                                /* 15*/ MatGetInfo_SeqBAIJ,
2649                                        MatEqual_SeqBAIJ,
2650                                        MatGetDiagonal_SeqBAIJ,
2651                                        MatDiagonalScale_SeqBAIJ,
2652                                        MatNorm_SeqBAIJ,
2653                                /* 20*/ NULL,
2654                                        MatAssemblyEnd_SeqBAIJ,
2655                                        MatSetOption_SeqBAIJ,
2656                                        MatZeroEntries_SeqBAIJ,
2657                                /* 24*/ MatZeroRows_SeqBAIJ,
2658                                        NULL,
2659                                        NULL,
2660                                        NULL,
2661                                        NULL,
2662                                /* 29*/ MatSetUp_SeqBAIJ,
2663                                        NULL,
2664                                        NULL,
2665                                        NULL,
2666                                        NULL,
2667                                /* 34*/ MatDuplicate_SeqBAIJ,
2668                                        NULL,
2669                                        NULL,
2670                                        MatILUFactor_SeqBAIJ,
2671                                        NULL,
2672                                /* 39*/ MatAXPY_SeqBAIJ,
2673                                        MatCreateSubMatrices_SeqBAIJ,
2674                                        MatIncreaseOverlap_SeqBAIJ,
2675                                        MatGetValues_SeqBAIJ,
2676                                        MatCopy_SeqBAIJ,
2677                                /* 44*/ NULL,
2678                                        MatScale_SeqBAIJ,
2679                                        MatShift_SeqBAIJ,
2680                                        NULL,
2681                                        MatZeroRowsColumns_SeqBAIJ,
2682                                /* 49*/ NULL,
2683                                        MatGetRowIJ_SeqBAIJ,
2684                                        MatRestoreRowIJ_SeqBAIJ,
2685                                        MatGetColumnIJ_SeqBAIJ,
2686                                        MatRestoreColumnIJ_SeqBAIJ,
2687                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2688                                        NULL,
2689                                        NULL,
2690                                        NULL,
2691                                        MatSetValuesBlocked_SeqBAIJ,
2692                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2693                                        MatDestroy_SeqBAIJ,
2694                                        MatView_SeqBAIJ,
2695                                        NULL,
2696                                        NULL,
2697                                /* 64*/ NULL,
2698                                        NULL,
2699                                        NULL,
2700                                        NULL,
2701                                        NULL,
2702                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2703                                        NULL,
2704                                        MatConvert_Basic,
2705                                        NULL,
2706                                        NULL,
2707                                /* 74*/ NULL,
2708                                        MatFDColoringApply_BAIJ,
2709                                        NULL,
2710                                        NULL,
2711                                        NULL,
2712                                /* 79*/ NULL,
2713                                        NULL,
2714                                        NULL,
2715                                        NULL,
2716                                        MatLoad_SeqBAIJ,
2717                                /* 84*/ NULL,
2718                                        NULL,
2719                                        NULL,
2720                                        NULL,
2721                                        NULL,
2722                                /* 89*/ NULL,
2723                                        NULL,
2724                                        NULL,
2725                                        NULL,
2726                                        NULL,
2727                                /* 94*/ NULL,
2728                                        NULL,
2729                                        NULL,
2730                                        NULL,
2731                                        NULL,
2732                                /* 99*/ NULL,
2733                                        NULL,
2734                                        NULL,
2735                                        MatConjugate_SeqBAIJ,
2736                                        NULL,
2737                                /*104*/ NULL,
2738                                        MatRealPart_SeqBAIJ,
2739                                        MatImaginaryPart_SeqBAIJ,
2740                                        NULL,
2741                                        NULL,
2742                                /*109*/ NULL,
2743                                        NULL,
2744                                        NULL,
2745                                        NULL,
2746                                        MatMissingDiagonal_SeqBAIJ,
2747                                /*114*/ NULL,
2748                                        NULL,
2749                                        NULL,
2750                                        NULL,
2751                                        NULL,
2752                                /*119*/ NULL,
2753                                        NULL,
2754                                        MatMultHermitianTranspose_SeqBAIJ,
2755                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2756                                        NULL,
2757                                /*124*/ NULL,
2758                                        MatGetColumnReductions_SeqBAIJ,
2759                                        MatInvertBlockDiagonal_SeqBAIJ,
2760                                        NULL,
2761                                        NULL,
2762                                /*129*/ NULL,
2763                                        NULL,
2764                                        NULL,
2765                                        NULL,
2766                                        NULL,
2767                                /*134*/ NULL,
2768                                        NULL,
2769                                        NULL,
2770                                        NULL,
2771                                        NULL,
2772                                /*139*/ MatSetBlockSizes_Default,
2773                                        NULL,
2774                                        NULL,
2775                                        MatFDColoringSetUp_SeqXAIJ,
2776                                        NULL,
2777                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2778                                        MatDestroySubMatrices_SeqBAIJ,
2779                                        NULL,
2780                                        NULL,
2781                                        NULL,
2782                                        NULL
2783 };
2784 
2785 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2786 {
2787   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2788   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2789 
2790   PetscFunctionBegin;
2791   PetscCheck(aij->nonew == 1,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2792 
2793   /* allocate space for values if not already there */
2794   if (!aij->saved_values) {
2795     PetscCall(PetscMalloc1(nz+1,&aij->saved_values));
2796     PetscCall(PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar)));
2797   }
2798 
2799   /* copy values over */
2800   PetscCall(PetscArraycpy(aij->saved_values,aij->a,nz));
2801   PetscFunctionReturn(0);
2802 }
2803 
2804 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2805 {
2806   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2807   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2808 
2809   PetscFunctionBegin;
2810   PetscCheck(aij->nonew == 1,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2811   PetscCheck(aij->saved_values,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2812 
2813   /* copy values over */
2814   PetscCall(PetscArraycpy(aij->a,aij->saved_values,nz));
2815   PetscFunctionReturn(0);
2816 }
2817 
2818 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2819 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2820 
2821 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2822 {
2823   Mat_SeqBAIJ    *b;
2824   PetscInt       i,mbs,nbs,bs2;
2825   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2826 
2827   PetscFunctionBegin;
2828   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2829   if (nz == MAT_SKIP_ALLOCATION) {
2830     skipallocation = PETSC_TRUE;
2831     nz             = 0;
2832   }
2833 
2834   PetscCall(MatSetBlockSize(B,PetscAbs(bs)));
2835   PetscCall(PetscLayoutSetUp(B->rmap));
2836   PetscCall(PetscLayoutSetUp(B->cmap));
2837   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
2838 
2839   B->preallocated = PETSC_TRUE;
2840 
2841   mbs = B->rmap->n/bs;
2842   nbs = B->cmap->n/bs;
2843   bs2 = bs*bs;
2844 
2845   PetscCheck(mbs*bs==B->rmap->n && nbs*bs==B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,B->cmap->n,bs);
2846 
2847   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2848   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %" PetscInt_FMT,nz);
2849   if (nnz) {
2850     for (i=0; i<mbs; i++) {
2851       PetscCheck(nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,nnz[i]);
2852       PetscCheck(nnz[i] <= nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT,i,nnz[i],nbs);
2853     }
2854   }
2855 
2856   b    = (Mat_SeqBAIJ*)B->data;
2857   PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2858   PetscCall(PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL));
2859   PetscOptionsEnd();
2860 
2861   if (!flg) {
2862     switch (bs) {
2863     case 1:
2864       B->ops->mult    = MatMult_SeqBAIJ_1;
2865       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2866       break;
2867     case 2:
2868       B->ops->mult    = MatMult_SeqBAIJ_2;
2869       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2870       break;
2871     case 3:
2872       B->ops->mult    = MatMult_SeqBAIJ_3;
2873       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2874       break;
2875     case 4:
2876       B->ops->mult    = MatMult_SeqBAIJ_4;
2877       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2878       break;
2879     case 5:
2880       B->ops->mult    = MatMult_SeqBAIJ_5;
2881       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2882       break;
2883     case 6:
2884       B->ops->mult    = MatMult_SeqBAIJ_6;
2885       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2886       break;
2887     case 7:
2888       B->ops->mult    = MatMult_SeqBAIJ_7;
2889       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2890       break;
2891     case 9:
2892     {
2893       PetscInt version = 1;
2894       PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL));
2895       switch (version) {
2896 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2897       case 1:
2898         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2899         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2900         PetscCall(PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2901         break;
2902 #endif
2903       default:
2904         B->ops->mult    = MatMult_SeqBAIJ_N;
2905         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2906         PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2907         break;
2908       }
2909       break;
2910     }
2911     case 11:
2912       B->ops->mult    = MatMult_SeqBAIJ_11;
2913       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2914       break;
2915     case 12:
2916     {
2917       PetscInt version = 1;
2918       PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL));
2919       switch (version) {
2920       case 1:
2921         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
2922         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2923         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2924         break;
2925       case 2:
2926         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
2927         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
2928         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2929         break;
2930 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2931       case 3:
2932         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
2933         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2934         PetscCall(PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2935         break;
2936 #endif
2937       default:
2938         B->ops->mult    = MatMult_SeqBAIJ_N;
2939         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2940         PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2941         break;
2942       }
2943       break;
2944     }
2945     case 15:
2946     {
2947       PetscInt version = 1;
2948       PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL));
2949       switch (version) {
2950       case 1:
2951         B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2952         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2953         break;
2954       case 2:
2955         B->ops->mult    = MatMult_SeqBAIJ_15_ver2;
2956         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2957         break;
2958       case 3:
2959         B->ops->mult    = MatMult_SeqBAIJ_15_ver3;
2960         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2961         break;
2962       case 4:
2963         B->ops->mult    = MatMult_SeqBAIJ_15_ver4;
2964         PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs));
2965         break;
2966       default:
2967         B->ops->mult    = MatMult_SeqBAIJ_N;
2968         PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2969         break;
2970       }
2971       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2972       break;
2973     }
2974     default:
2975       B->ops->mult    = MatMult_SeqBAIJ_N;
2976       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2977       PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs));
2978       break;
2979     }
2980   }
2981   B->ops->sor = MatSOR_SeqBAIJ;
2982   b->mbs = mbs;
2983   b->nbs = nbs;
2984   if (!skipallocation) {
2985     if (!b->imax) {
2986       PetscCall(PetscMalloc2(mbs,&b->imax,mbs,&b->ilen));
2987       PetscCall(PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt)));
2988 
2989       b->free_imax_ilen = PETSC_TRUE;
2990     }
2991     /* b->ilen will count nonzeros in each block row so far. */
2992     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2993     if (!nnz) {
2994       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2995       else if (nz < 0) nz = 1;
2996       nz = PetscMin(nz,nbs);
2997       for (i=0; i<mbs; i++) b->imax[i] = nz;
2998       PetscCall(PetscIntMultError(nz,mbs,&nz));
2999     } else {
3000       PetscInt64 nz64 = 0;
3001       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
3002       PetscCall(PetscIntCast(nz64,&nz));
3003     }
3004 
3005     /* allocate the matrix space */
3006     PetscCall(MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i));
3007     if (B->structure_only) {
3008       PetscCall(PetscMalloc1(nz,&b->j));
3009       PetscCall(PetscMalloc1(B->rmap->N+1,&b->i));
3010       PetscCall(PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt)));
3011     } else {
3012       PetscInt nzbs2 = 0;
3013       PetscCall(PetscIntMultError(nz,bs2,&nzbs2));
3014       PetscCall(PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i));
3015       PetscCall(PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))));
3016       PetscCall(PetscArrayzero(b->a,nz*bs2));
3017     }
3018     PetscCall(PetscArrayzero(b->j,nz));
3019 
3020     if (B->structure_only) {
3021       b->singlemalloc = PETSC_FALSE;
3022       b->free_a       = PETSC_FALSE;
3023     } else {
3024       b->singlemalloc = PETSC_TRUE;
3025       b->free_a       = PETSC_TRUE;
3026     }
3027     b->free_ij = PETSC_TRUE;
3028 
3029     b->i[0] = 0;
3030     for (i=1; i<mbs+1; i++) {
3031       b->i[i] = b->i[i-1] + b->imax[i-1];
3032     }
3033 
3034   } else {
3035     b->free_a  = PETSC_FALSE;
3036     b->free_ij = PETSC_FALSE;
3037   }
3038 
3039   b->bs2              = bs2;
3040   b->mbs              = mbs;
3041   b->nz               = 0;
3042   b->maxnz            = nz;
3043   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3044   B->was_assembled    = PETSC_FALSE;
3045   B->assembled        = PETSC_FALSE;
3046   if (realalloc) PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE));
3047   PetscFunctionReturn(0);
3048 }
3049 
3050 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3051 {
3052   PetscInt       i,m,nz,nz_max=0,*nnz;
3053   PetscScalar    *values=NULL;
3054   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
3055 
3056   PetscFunctionBegin;
3057   PetscCheck(bs >= 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs);
3058   PetscCall(PetscLayoutSetBlockSize(B->rmap,bs));
3059   PetscCall(PetscLayoutSetBlockSize(B->cmap,bs));
3060   PetscCall(PetscLayoutSetUp(B->rmap));
3061   PetscCall(PetscLayoutSetUp(B->cmap));
3062   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
3063   m    = B->rmap->n/bs;
3064 
3065   PetscCheck(ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]);
3066   PetscCall(PetscMalloc1(m+1, &nnz));
3067   for (i=0; i<m; i++) {
3068     nz = ii[i+1]- ii[i];
3069     PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz);
3070     nz_max = PetscMax(nz_max, nz);
3071     nnz[i] = nz;
3072   }
3073   PetscCall(MatSeqBAIJSetPreallocation(B,bs,0,nnz));
3074   PetscCall(PetscFree(nnz));
3075 
3076   values = (PetscScalar*)V;
3077   if (!values) {
3078     PetscCall(PetscCalloc1(bs*bs*(nz_max+1),&values));
3079   }
3080   for (i=0; i<m; i++) {
3081     PetscInt          ncols  = ii[i+1] - ii[i];
3082     const PetscInt    *icols = jj + ii[i];
3083     if (bs == 1 || !roworiented) {
3084       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3085       PetscCall(MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES));
3086     } else {
3087       PetscInt j;
3088       for (j=0; j<ncols; j++) {
3089         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3090         PetscCall(MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES));
3091       }
3092     }
3093   }
3094   if (!V) PetscCall(PetscFree(values));
3095   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
3096   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
3097   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3098   PetscFunctionReturn(0);
3099 }
3100 
3101 /*@C
3102    MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
3103 
3104    Not Collective
3105 
3106    Input Parameter:
3107 .  mat - a MATSEQBAIJ matrix
3108 
3109    Output Parameter:
3110 .   array - pointer to the data
3111 
3112    Level: intermediate
3113 
3114 .seealso: `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3115 @*/
3116 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3117 {
3118   PetscFunctionBegin;
3119   PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));
3120   PetscFunctionReturn(0);
3121 }
3122 
3123 /*@C
3124    MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3125 
3126    Not Collective
3127 
3128    Input Parameters:
3129 +  mat - a MATSEQBAIJ matrix
3130 -  array - pointer to the data
3131 
3132    Level: intermediate
3133 
3134 .seealso: `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3135 @*/
3136 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3137 {
3138   PetscFunctionBegin;
3139   PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));
3140   PetscFunctionReturn(0);
3141 }
3142 
3143 /*MC
3144    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3145    block sparse compressed row format.
3146 
3147    Options Database Keys:
3148 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3149 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3150 
3151    Level: beginner
3152 
3153    Notes:
3154     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3155     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
3156 
3157    Run with -info to see what version of the matrix-vector product is being used
3158 
3159 .seealso: `MatCreateSeqBAIJ()`
3160 M*/
3161 
3162 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3163 
3164 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3165 {
3166   PetscMPIInt    size;
3167   Mat_SeqBAIJ    *b;
3168 
3169   PetscFunctionBegin;
3170   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
3171   PetscCheck(size == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3172 
3173   PetscCall(PetscNewLog(B,&b));
3174   B->data = (void*)b;
3175   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
3176 
3177   b->row          = NULL;
3178   b->col          = NULL;
3179   b->icol         = NULL;
3180   b->reallocs     = 0;
3181   b->saved_values = NULL;
3182 
3183   b->roworiented        = PETSC_TRUE;
3184   b->nonew              = 0;
3185   b->diag               = NULL;
3186   B->spptr              = NULL;
3187   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3188   b->keepnonzeropattern = PETSC_FALSE;
3189 
3190   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ));
3191   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ));
3192   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ));
3193   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ));
3194   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ));
3195   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ));
3196   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ));
3197   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ));
3198   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
3199   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ));
3200 #if defined(PETSC_HAVE_HYPRE)
3201   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE));
3202 #endif
3203   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS));
3204   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ));
3205   PetscFunctionReturn(0);
3206 }
3207 
3208 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3209 {
3210   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3211   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3212 
3213   PetscFunctionBegin;
3214   PetscCheck(a->i[mbs] == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3215 
3216   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3217     c->imax           = a->imax;
3218     c->ilen           = a->ilen;
3219     c->free_imax_ilen = PETSC_FALSE;
3220   } else {
3221     PetscCall(PetscMalloc2(mbs,&c->imax,mbs,&c->ilen));
3222     PetscCall(PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt)));
3223     for (i=0; i<mbs; i++) {
3224       c->imax[i] = a->imax[i];
3225       c->ilen[i] = a->ilen[i];
3226     }
3227     c->free_imax_ilen = PETSC_TRUE;
3228   }
3229 
3230   /* allocate the matrix space */
3231   if (mallocmatspace) {
3232     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3233       PetscCall(PetscCalloc1(bs2*nz,&c->a));
3234       PetscCall(PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar)));
3235 
3236       c->i            = a->i;
3237       c->j            = a->j;
3238       c->singlemalloc = PETSC_FALSE;
3239       c->free_a       = PETSC_TRUE;
3240       c->free_ij      = PETSC_FALSE;
3241       c->parent       = A;
3242       C->preallocated = PETSC_TRUE;
3243       C->assembled    = PETSC_TRUE;
3244 
3245       PetscCall(PetscObjectReference((PetscObject)A));
3246       PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3247       PetscCall(MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
3248     } else {
3249       PetscCall(PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i));
3250       PetscCall(PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt)));
3251 
3252       c->singlemalloc = PETSC_TRUE;
3253       c->free_a       = PETSC_TRUE;
3254       c->free_ij      = PETSC_TRUE;
3255 
3256       PetscCall(PetscArraycpy(c->i,a->i,mbs+1));
3257       if (mbs > 0) {
3258         PetscCall(PetscArraycpy(c->j,a->j,nz));
3259         if (cpvalues == MAT_COPY_VALUES) {
3260           PetscCall(PetscArraycpy(c->a,a->a,bs2*nz));
3261         } else {
3262           PetscCall(PetscArrayzero(c->a,bs2*nz));
3263         }
3264       }
3265       C->preallocated = PETSC_TRUE;
3266       C->assembled    = PETSC_TRUE;
3267     }
3268   }
3269 
3270   c->roworiented = a->roworiented;
3271   c->nonew       = a->nonew;
3272 
3273   PetscCall(PetscLayoutReference(A->rmap,&C->rmap));
3274   PetscCall(PetscLayoutReference(A->cmap,&C->cmap));
3275 
3276   c->bs2         = a->bs2;
3277   c->mbs         = a->mbs;
3278   c->nbs         = a->nbs;
3279 
3280   if (a->diag) {
3281     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3282       c->diag      = a->diag;
3283       c->free_diag = PETSC_FALSE;
3284     } else {
3285       PetscCall(PetscMalloc1(mbs+1,&c->diag));
3286       PetscCall(PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt)));
3287       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3288       c->free_diag = PETSC_TRUE;
3289     }
3290   } else c->diag = NULL;
3291 
3292   c->nz         = a->nz;
3293   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3294   c->solve_work = NULL;
3295   c->mult_work  = NULL;
3296   c->sor_workt  = NULL;
3297   c->sor_work   = NULL;
3298 
3299   c->compressedrow.use   = a->compressedrow.use;
3300   c->compressedrow.nrows = a->compressedrow.nrows;
3301   if (a->compressedrow.use) {
3302     i    = a->compressedrow.nrows;
3303     PetscCall(PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex));
3304     PetscCall(PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt)));
3305     PetscCall(PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1));
3306     PetscCall(PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i));
3307   } else {
3308     c->compressedrow.use    = PETSC_FALSE;
3309     c->compressedrow.i      = NULL;
3310     c->compressedrow.rindex = NULL;
3311   }
3312   C->nonzerostate = A->nonzerostate;
3313 
3314   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist));
3315   PetscFunctionReturn(0);
3316 }
3317 
3318 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3319 {
3320   PetscFunctionBegin;
3321   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),B));
3322   PetscCall(MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n));
3323   PetscCall(MatSetType(*B,MATSEQBAIJ));
3324   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE));
3325   PetscFunctionReturn(0);
3326 }
3327 
3328 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3329 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3330 {
3331   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3332   PetscInt       *rowidxs,*colidxs;
3333   PetscScalar    *matvals;
3334 
3335   PetscFunctionBegin;
3336   PetscCall(PetscViewerSetUp(viewer));
3337 
3338   /* read matrix header */
3339   PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT));
3340   PetscCheck(header[0] == MAT_FILE_CLASSID,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3341   M = header[1]; N = header[2]; nz = header[3];
3342   PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M);
3343   PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N);
3344   PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ");
3345 
3346   /* set block sizes from the viewer's .info file */
3347   PetscCall(MatLoad_Binary_BlockSizes(mat,viewer));
3348   /* set local and global sizes if not set already */
3349   if (mat->rmap->n < 0) mat->rmap->n = M;
3350   if (mat->cmap->n < 0) mat->cmap->n = N;
3351   if (mat->rmap->N < 0) mat->rmap->N = M;
3352   if (mat->cmap->N < 0) mat->cmap->N = N;
3353   PetscCall(PetscLayoutSetUp(mat->rmap));
3354   PetscCall(PetscLayoutSetUp(mat->cmap));
3355 
3356   /* check if the matrix sizes are correct */
3357   PetscCall(MatGetSize(mat,&rows,&cols));
3358   PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols);
3359   PetscCall(MatGetBlockSize(mat,&bs));
3360   PetscCall(MatGetLocalSize(mat,&m,&n));
3361   mbs = m/bs; nbs = n/bs;
3362 
3363   /* read in row lengths, column indices and nonzero values */
3364   PetscCall(PetscMalloc1(m+1,&rowidxs));
3365   PetscCall(PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT));
3366   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3367   sum = rowidxs[m];
3368   PetscCheck(sum == nz,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum);
3369 
3370   /* read in column indices and nonzero values */
3371   PetscCall(PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals));
3372   PetscCall(PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT));
3373   PetscCall(PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR));
3374 
3375   { /* preallocate matrix storage */
3376     PetscBT   bt; /* helper bit set to count nonzeros */
3377     PetscInt  *nnz;
3378     PetscBool sbaij;
3379 
3380     PetscCall(PetscBTCreate(nbs,&bt));
3381     PetscCall(PetscCalloc1(mbs,&nnz));
3382     PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij));
3383     for (i=0; i<mbs; i++) {
3384       PetscCall(PetscBTMemzero(nbs,bt));
3385       for (k=0; k<bs; k++) {
3386         PetscInt row = bs*i + k;
3387         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3388           PetscInt col = colidxs[j];
3389           if (!sbaij || col >= row)
3390             if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++;
3391         }
3392       }
3393     }
3394     PetscCall(PetscBTDestroy(&bt));
3395     PetscCall(MatSeqBAIJSetPreallocation(mat,bs,0,nnz));
3396     PetscCall(MatSeqSBAIJSetPreallocation(mat,bs,0,nnz));
3397     PetscCall(PetscFree(nnz));
3398   }
3399 
3400   /* store matrix values */
3401   for (i=0; i<m; i++) {
3402     PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3403     PetscCall((*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES));
3404   }
3405 
3406   PetscCall(PetscFree(rowidxs));
3407   PetscCall(PetscFree2(colidxs,matvals));
3408   PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY));
3409   PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY));
3410   PetscFunctionReturn(0);
3411 }
3412 
3413 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3414 {
3415   PetscBool isbinary;
3416 
3417   PetscFunctionBegin;
3418   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
3419   PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3420   PetscCall(MatLoad_SeqBAIJ_Binary(mat,viewer));
3421   PetscFunctionReturn(0);
3422 }
3423 
3424 /*@C
3425    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3426    compressed row) format.  For good matrix assembly performance the
3427    user should preallocate the matrix storage by setting the parameter nz
3428    (or the array nnz).  By setting these parameters accurately, performance
3429    during matrix assembly can be increased by more than a factor of 50.
3430 
3431    Collective
3432 
3433    Input Parameters:
3434 +  comm - MPI communicator, set to PETSC_COMM_SELF
3435 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3436           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3437 .  m - number of rows
3438 .  n - number of columns
3439 .  nz - number of nonzero blocks  per block row (same for all rows)
3440 -  nnz - array containing the number of nonzero blocks in the various block rows
3441          (possibly different for each block row) or NULL
3442 
3443    Output Parameter:
3444 .  A - the matrix
3445 
3446    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3447    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3448    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3449 
3450    Options Database Keys:
3451 +   -mat_no_unroll - uses code that does not unroll the loops in the
3452                      block calculations (much slower)
3453 -    -mat_block_size - size of the blocks to use
3454 
3455    Level: intermediate
3456 
3457    Notes:
3458    The number of rows and columns must be divisible by blocksize.
3459 
3460    If the nnz parameter is given then the nz parameter is ignored
3461 
3462    A nonzero block is any block that as 1 or more nonzeros in it
3463 
3464    The block AIJ format is fully compatible with standard Fortran 77
3465    storage.  That is, the stored row and column indices can begin at
3466    either one (as in Fortran) or zero.  See the users' manual for details.
3467 
3468    Specify the preallocated storage with either nz or nnz (not both).
3469    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3470    allocation.  See Users-Manual: ch_mat for details.
3471    matrices.
3472 
3473 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3474 @*/
3475 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3476 {
3477   PetscFunctionBegin;
3478   PetscCall(MatCreate(comm,A));
3479   PetscCall(MatSetSizes(*A,m,n,m,n));
3480   PetscCall(MatSetType(*A,MATSEQBAIJ));
3481   PetscCall(MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz));
3482   PetscFunctionReturn(0);
3483 }
3484 
3485 /*@C
3486    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3487    per row in the matrix. For good matrix assembly performance the
3488    user should preallocate the matrix storage by setting the parameter nz
3489    (or the array nnz).  By setting these parameters accurately, performance
3490    during matrix assembly can be increased by more than a factor of 50.
3491 
3492    Collective
3493 
3494    Input Parameters:
3495 +  B - the matrix
3496 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3497           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3498 .  nz - number of block nonzeros per block row (same for all rows)
3499 -  nnz - array containing the number of block nonzeros in the various block rows
3500          (possibly different for each block row) or NULL
3501 
3502    Options Database Keys:
3503 +   -mat_no_unroll - uses code that does not unroll the loops in the
3504                      block calculations (much slower)
3505 -   -mat_block_size - size of the blocks to use
3506 
3507    Level: intermediate
3508 
3509    Notes:
3510    If the nnz parameter is given then the nz parameter is ignored
3511 
3512    You can call MatGetInfo() to get information on how effective the preallocation was;
3513    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3514    You can also run with the option -info and look for messages with the string
3515    malloc in them to see if additional memory allocation was needed.
3516 
3517    The block AIJ format is fully compatible with standard Fortran 77
3518    storage.  That is, the stored row and column indices can begin at
3519    either one (as in Fortran) or zero.  See the users' manual for details.
3520 
3521    Specify the preallocated storage with either nz or nnz (not both).
3522    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3523    allocation.  See Users-Manual: ch_mat for details.
3524 
3525 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3526 @*/
3527 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3528 {
3529   PetscFunctionBegin;
3530   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3531   PetscValidType(B,1);
3532   PetscValidLogicalCollectiveInt(B,bs,2);
3533   PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));
3534   PetscFunctionReturn(0);
3535 }
3536 
3537 /*@C
3538    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3539 
3540    Collective
3541 
3542    Input Parameters:
3543 +  B - the matrix
3544 .  i - the indices into j for the start of each local row (starts with zero)
3545 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3546 -  v - optional values in the matrix
3547 
3548    Level: advanced
3549 
3550    Notes:
3551    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3552    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3553    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3554    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3555    block column and the second index is over columns within a block.
3556 
3557    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3558 
3559 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3560 @*/
3561 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3562 {
3563   PetscFunctionBegin;
3564   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3565   PetscValidType(B,1);
3566   PetscValidLogicalCollectiveInt(B,bs,2);
3567   PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
3568   PetscFunctionReturn(0);
3569 }
3570 
3571 /*@
3572      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3573 
3574      Collective
3575 
3576    Input Parameters:
3577 +  comm - must be an MPI communicator of size 1
3578 .  bs - size of block
3579 .  m - number of rows
3580 .  n - number of columns
3581 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3582 .  j - column indices
3583 -  a - matrix values
3584 
3585    Output Parameter:
3586 .  mat - the matrix
3587 
3588    Level: advanced
3589 
3590    Notes:
3591        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3592     once the matrix is destroyed
3593 
3594        You cannot set new nonzero locations into this matrix, that will generate an error.
3595 
3596        The i and j indices are 0 based
3597 
3598        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3599 
3600       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3601       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3602       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3603       with column-major ordering within blocks.
3604 
3605 .seealso: `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3606 
3607 @*/
3608 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3609 {
3610   PetscInt       ii;
3611   Mat_SeqBAIJ    *baij;
3612 
3613   PetscFunctionBegin;
3614   PetscCheck(bs == 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %" PetscInt_FMT " > 1 is not supported yet",bs);
3615   if (m > 0) PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3616 
3617   PetscCall(MatCreate(comm,mat));
3618   PetscCall(MatSetSizes(*mat,m,n,m,n));
3619   PetscCall(MatSetType(*mat,MATSEQBAIJ));
3620   PetscCall(MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL));
3621   baij = (Mat_SeqBAIJ*)(*mat)->data;
3622   PetscCall(PetscMalloc2(m,&baij->imax,m,&baij->ilen));
3623   PetscCall(PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt)));
3624 
3625   baij->i = i;
3626   baij->j = j;
3627   baij->a = a;
3628 
3629   baij->singlemalloc = PETSC_FALSE;
3630   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3631   baij->free_a       = PETSC_FALSE;
3632   baij->free_ij      = PETSC_FALSE;
3633 
3634   for (ii=0; ii<m; ii++) {
3635     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3636     PetscCheck(i[ii+1] - i[ii] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT,ii,i[ii+1] - i[ii]);
3637   }
3638   if (PetscDefined(USE_DEBUG)) {
3639     for (ii=0; ii<baij->i[m]; ii++) {
3640       PetscCheck(j[ii] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT,ii,j[ii]);
3641       PetscCheck(j[ii] <= n - 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT,ii,j[ii]);
3642     }
3643   }
3644 
3645   PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY));
3646   PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY));
3647   PetscFunctionReturn(0);
3648 }
3649 
3650 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3651 {
3652   PetscFunctionBegin;
3653   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat));
3654   PetscFunctionReturn(0);
3655 }
3656