xref: /petsc/src/mat/impls/baij/seq/baij.c (revision badd099fb2ece77d080fc02aefe95d4a02e75697)
1 /*
2     Defines the basic matrix operations for the BAIJ (compressed row)
3   matrix storage format.
4 */
5 #include <../src/mat/impls/baij/seq/baij.h> /*I   "petscmat.h"  I*/
6 #include <petscblaslapack.h>
7 #include <petsc/private/kernels/blockinvert.h>
8 #include <petsc/private/kernels/blockmatmult.h>
9 
10 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */
11 #define TYPE BAIJ
12 #define TYPE_BS
13 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
14 #undef TYPE_BS
15 #define TYPE_BS _BS
16 #define TYPE_BS_ON
17 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
18 #undef TYPE_BS
19 #include "../src/mat/impls/aij/seq/seqhashmat.h"
20 #undef TYPE
21 #undef TYPE_BS_ON
22 
23 #if defined(PETSC_HAVE_HYPRE)
24 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
25 #endif
26 
27 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
28 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
29 #endif
30 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
31 
32 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
33 {
34   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
35   PetscInt     m, n, ib, jb, bs = A->rmap->bs;
36   MatScalar   *a_val = a_aij->a;
37 
38   PetscFunctionBegin;
39   PetscCall(MatGetSize(A, &m, &n));
40   PetscCall(PetscArrayzero(reductions, n));
41   if (type == NORM_2) {
42     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
43       for (jb = 0; jb < bs; jb++) {
44         for (ib = 0; ib < bs; ib++) {
45           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
46           a_val++;
47         }
48       }
49     }
50   } else if (type == NORM_1) {
51     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
52       for (jb = 0; jb < bs; jb++) {
53         for (ib = 0; ib < bs; ib++) {
54           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
55           a_val++;
56         }
57       }
58     }
59   } else if (type == NORM_INFINITY) {
60     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
61       for (jb = 0; jb < bs; jb++) {
62         for (ib = 0; ib < bs; ib++) {
63           PetscInt col    = A->cmap->rstart + a_aij->j[i] * bs + jb;
64           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
65           a_val++;
66         }
67       }
68     }
69   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
70     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
71       for (jb = 0; jb < bs; jb++) {
72         for (ib = 0; ib < bs; ib++) {
73           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
74           a_val++;
75         }
76       }
77     }
78   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
79     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
80       for (jb = 0; jb < bs; jb++) {
81         for (ib = 0; ib < bs; ib++) {
82           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
83           a_val++;
84         }
85       }
86     }
87   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
88   if (type == NORM_2) {
89     for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
90   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
91     for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
92   }
93   PetscFunctionReturn(PETSC_SUCCESS);
94 }
95 
96 static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
97 {
98   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
99   PetscInt    *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
100   MatScalar   *v     = a->a, *odiag, *diag, work[25], *v_work;
101   PetscReal    shift = 0.0;
102   PetscBool    allowzeropivot, zeropivotdetected = PETSC_FALSE;
103 
104   PetscFunctionBegin;
105   allowzeropivot = PetscNot(A->erroriffailure);
106 
107   if (a->idiagvalid) {
108     if (values) *values = a->idiag;
109     PetscFunctionReturn(PETSC_SUCCESS);
110   }
111   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
112   diag_offset = a->diag;
113   if (!a->idiag) PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag));
114   diag = a->idiag;
115   if (values) *values = a->idiag;
116   /* factor and invert each block */
117   switch (bs) {
118   case 1:
119     for (i = 0; i < mbs; i++) {
120       odiag   = v + 1 * diag_offset[i];
121       diag[0] = odiag[0];
122 
123       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
124         PetscCheck(allowzeropivot, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
125         A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
126         A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
127         A->factorerror_zeropivot_row   = i;
128         PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
129       }
130 
131       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
132       diag += 1;
133     }
134     break;
135   case 2:
136     for (i = 0; i < mbs; i++) {
137       odiag   = v + 4 * diag_offset[i];
138       diag[0] = odiag[0];
139       diag[1] = odiag[1];
140       diag[2] = odiag[2];
141       diag[3] = odiag[3];
142       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
143       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
144       diag += 4;
145     }
146     break;
147   case 3:
148     for (i = 0; i < mbs; i++) {
149       odiag   = v + 9 * diag_offset[i];
150       diag[0] = odiag[0];
151       diag[1] = odiag[1];
152       diag[2] = odiag[2];
153       diag[3] = odiag[3];
154       diag[4] = odiag[4];
155       diag[5] = odiag[5];
156       diag[6] = odiag[6];
157       diag[7] = odiag[7];
158       diag[8] = odiag[8];
159       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
160       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
161       diag += 9;
162     }
163     break;
164   case 4:
165     for (i = 0; i < mbs; i++) {
166       odiag = v + 16 * diag_offset[i];
167       PetscCall(PetscArraycpy(diag, odiag, 16));
168       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
169       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
170       diag += 16;
171     }
172     break;
173   case 5:
174     for (i = 0; i < mbs; i++) {
175       odiag = v + 25 * diag_offset[i];
176       PetscCall(PetscArraycpy(diag, odiag, 25));
177       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
178       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
179       diag += 25;
180     }
181     break;
182   case 6:
183     for (i = 0; i < mbs; i++) {
184       odiag = v + 36 * diag_offset[i];
185       PetscCall(PetscArraycpy(diag, odiag, 36));
186       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
187       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
188       diag += 36;
189     }
190     break;
191   case 7:
192     for (i = 0; i < mbs; i++) {
193       odiag = v + 49 * diag_offset[i];
194       PetscCall(PetscArraycpy(diag, odiag, 49));
195       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
196       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
197       diag += 49;
198     }
199     break;
200   default:
201     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
202     for (i = 0; i < mbs; i++) {
203       odiag = v + bs2 * diag_offset[i];
204       PetscCall(PetscArraycpy(diag, odiag, bs2));
205       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
206       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
207       diag += bs2;
208     }
209     PetscCall(PetscFree2(v_work, v_pivots));
210   }
211   a->idiagvalid = PETSC_TRUE;
212   PetscFunctionReturn(PETSC_SUCCESS);
213 }
214 
215 static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
216 {
217   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
218   PetscScalar       *x, *work, *w, *workt, *t;
219   const MatScalar   *v, *aa = a->a, *idiag;
220   const PetscScalar *b, *xb;
221   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
222   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
223   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;
224 
225   PetscFunctionBegin;
226   its = its * lits;
227   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
228   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
229   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
230   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
231   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
232 
233   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
234 
235   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
236   diag  = a->diag;
237   idiag = a->idiag;
238   k     = PetscMax(A->rmap->n, A->cmap->n);
239   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
240   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
241   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
242   work = a->mult_work;
243   t    = a->sor_workt;
244   w    = a->sor_work;
245 
246   PetscCall(VecGetArray(xx, &x));
247   PetscCall(VecGetArrayRead(bb, &b));
248 
249   if (flag & SOR_ZERO_INITIAL_GUESS) {
250     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
251       switch (bs) {
252       case 1:
253         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
254         t[0] = b[0];
255         i2   = 1;
256         idiag += 1;
257         for (i = 1; i < m; i++) {
258           v    = aa + ai[i];
259           vi   = aj + ai[i];
260           nz   = diag[i] - ai[i];
261           s[0] = b[i2];
262           for (j = 0; j < nz; j++) {
263             xw[0] = x[vi[j]];
264             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
265           }
266           t[i2] = s[0];
267           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
268           x[i2] = xw[0];
269           idiag += 1;
270           i2 += 1;
271         }
272         break;
273       case 2:
274         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
275         t[0] = b[0];
276         t[1] = b[1];
277         i2   = 2;
278         idiag += 4;
279         for (i = 1; i < m; i++) {
280           v    = aa + 4 * ai[i];
281           vi   = aj + ai[i];
282           nz   = diag[i] - ai[i];
283           s[0] = b[i2];
284           s[1] = b[i2 + 1];
285           for (j = 0; j < nz; j++) {
286             idx   = 2 * vi[j];
287             it    = 4 * j;
288             xw[0] = x[idx];
289             xw[1] = x[1 + idx];
290             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
291           }
292           t[i2]     = s[0];
293           t[i2 + 1] = s[1];
294           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
295           x[i2]     = xw[0];
296           x[i2 + 1] = xw[1];
297           idiag += 4;
298           i2 += 2;
299         }
300         break;
301       case 3:
302         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
303         t[0] = b[0];
304         t[1] = b[1];
305         t[2] = b[2];
306         i2   = 3;
307         idiag += 9;
308         for (i = 1; i < m; i++) {
309           v    = aa + 9 * ai[i];
310           vi   = aj + ai[i];
311           nz   = diag[i] - ai[i];
312           s[0] = b[i2];
313           s[1] = b[i2 + 1];
314           s[2] = b[i2 + 2];
315           while (nz--) {
316             idx   = 3 * (*vi++);
317             xw[0] = x[idx];
318             xw[1] = x[1 + idx];
319             xw[2] = x[2 + idx];
320             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
321             v += 9;
322           }
323           t[i2]     = s[0];
324           t[i2 + 1] = s[1];
325           t[i2 + 2] = s[2];
326           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
327           x[i2]     = xw[0];
328           x[i2 + 1] = xw[1];
329           x[i2 + 2] = xw[2];
330           idiag += 9;
331           i2 += 3;
332         }
333         break;
334       case 4:
335         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
336         t[0] = b[0];
337         t[1] = b[1];
338         t[2] = b[2];
339         t[3] = b[3];
340         i2   = 4;
341         idiag += 16;
342         for (i = 1; i < m; i++) {
343           v    = aa + 16 * ai[i];
344           vi   = aj + ai[i];
345           nz   = diag[i] - ai[i];
346           s[0] = b[i2];
347           s[1] = b[i2 + 1];
348           s[2] = b[i2 + 2];
349           s[3] = b[i2 + 3];
350           while (nz--) {
351             idx   = 4 * (*vi++);
352             xw[0] = x[idx];
353             xw[1] = x[1 + idx];
354             xw[2] = x[2 + idx];
355             xw[3] = x[3 + idx];
356             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
357             v += 16;
358           }
359           t[i2]     = s[0];
360           t[i2 + 1] = s[1];
361           t[i2 + 2] = s[2];
362           t[i2 + 3] = s[3];
363           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
364           x[i2]     = xw[0];
365           x[i2 + 1] = xw[1];
366           x[i2 + 2] = xw[2];
367           x[i2 + 3] = xw[3];
368           idiag += 16;
369           i2 += 4;
370         }
371         break;
372       case 5:
373         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
374         t[0] = b[0];
375         t[1] = b[1];
376         t[2] = b[2];
377         t[3] = b[3];
378         t[4] = b[4];
379         i2   = 5;
380         idiag += 25;
381         for (i = 1; i < m; i++) {
382           v    = aa + 25 * ai[i];
383           vi   = aj + ai[i];
384           nz   = diag[i] - ai[i];
385           s[0] = b[i2];
386           s[1] = b[i2 + 1];
387           s[2] = b[i2 + 2];
388           s[3] = b[i2 + 3];
389           s[4] = b[i2 + 4];
390           while (nz--) {
391             idx   = 5 * (*vi++);
392             xw[0] = x[idx];
393             xw[1] = x[1 + idx];
394             xw[2] = x[2 + idx];
395             xw[3] = x[3 + idx];
396             xw[4] = x[4 + idx];
397             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
398             v += 25;
399           }
400           t[i2]     = s[0];
401           t[i2 + 1] = s[1];
402           t[i2 + 2] = s[2];
403           t[i2 + 3] = s[3];
404           t[i2 + 4] = s[4];
405           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
406           x[i2]     = xw[0];
407           x[i2 + 1] = xw[1];
408           x[i2 + 2] = xw[2];
409           x[i2 + 3] = xw[3];
410           x[i2 + 4] = xw[4];
411           idiag += 25;
412           i2 += 5;
413         }
414         break;
415       case 6:
416         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
417         t[0] = b[0];
418         t[1] = b[1];
419         t[2] = b[2];
420         t[3] = b[3];
421         t[4] = b[4];
422         t[5] = b[5];
423         i2   = 6;
424         idiag += 36;
425         for (i = 1; i < m; i++) {
426           v    = aa + 36 * ai[i];
427           vi   = aj + ai[i];
428           nz   = diag[i] - ai[i];
429           s[0] = b[i2];
430           s[1] = b[i2 + 1];
431           s[2] = b[i2 + 2];
432           s[3] = b[i2 + 3];
433           s[4] = b[i2 + 4];
434           s[5] = b[i2 + 5];
435           while (nz--) {
436             idx   = 6 * (*vi++);
437             xw[0] = x[idx];
438             xw[1] = x[1 + idx];
439             xw[2] = x[2 + idx];
440             xw[3] = x[3 + idx];
441             xw[4] = x[4 + idx];
442             xw[5] = x[5 + idx];
443             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
444             v += 36;
445           }
446           t[i2]     = s[0];
447           t[i2 + 1] = s[1];
448           t[i2 + 2] = s[2];
449           t[i2 + 3] = s[3];
450           t[i2 + 4] = s[4];
451           t[i2 + 5] = s[5];
452           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
453           x[i2]     = xw[0];
454           x[i2 + 1] = xw[1];
455           x[i2 + 2] = xw[2];
456           x[i2 + 3] = xw[3];
457           x[i2 + 4] = xw[4];
458           x[i2 + 5] = xw[5];
459           idiag += 36;
460           i2 += 6;
461         }
462         break;
463       case 7:
464         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
465         t[0] = b[0];
466         t[1] = b[1];
467         t[2] = b[2];
468         t[3] = b[3];
469         t[4] = b[4];
470         t[5] = b[5];
471         t[6] = b[6];
472         i2   = 7;
473         idiag += 49;
474         for (i = 1; i < m; i++) {
475           v    = aa + 49 * ai[i];
476           vi   = aj + ai[i];
477           nz   = diag[i] - ai[i];
478           s[0] = b[i2];
479           s[1] = b[i2 + 1];
480           s[2] = b[i2 + 2];
481           s[3] = b[i2 + 3];
482           s[4] = b[i2 + 4];
483           s[5] = b[i2 + 5];
484           s[6] = b[i2 + 6];
485           while (nz--) {
486             idx   = 7 * (*vi++);
487             xw[0] = x[idx];
488             xw[1] = x[1 + idx];
489             xw[2] = x[2 + idx];
490             xw[3] = x[3 + idx];
491             xw[4] = x[4 + idx];
492             xw[5] = x[5 + idx];
493             xw[6] = x[6 + idx];
494             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
495             v += 49;
496           }
497           t[i2]     = s[0];
498           t[i2 + 1] = s[1];
499           t[i2 + 2] = s[2];
500           t[i2 + 3] = s[3];
501           t[i2 + 4] = s[4];
502           t[i2 + 5] = s[5];
503           t[i2 + 6] = s[6];
504           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
505           x[i2]     = xw[0];
506           x[i2 + 1] = xw[1];
507           x[i2 + 2] = xw[2];
508           x[i2 + 3] = xw[3];
509           x[i2 + 4] = xw[4];
510           x[i2 + 5] = xw[5];
511           x[i2 + 6] = xw[6];
512           idiag += 49;
513           i2 += 7;
514         }
515         break;
516       default:
517         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
518         PetscCall(PetscArraycpy(t, b, bs));
519         i2 = bs;
520         idiag += bs2;
521         for (i = 1; i < m; i++) {
522           v  = aa + bs2 * ai[i];
523           vi = aj + ai[i];
524           nz = diag[i] - ai[i];
525 
526           PetscCall(PetscArraycpy(w, b + i2, bs));
527           /* copy all rows of x that are needed into contiguous space */
528           workt = work;
529           for (j = 0; j < nz; j++) {
530             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
531             workt += bs;
532           }
533           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
534           PetscCall(PetscArraycpy(t + i2, w, bs));
535           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
536 
537           idiag += bs2;
538           i2 += bs;
539         }
540         break;
541       }
542       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
543       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
544       xb = t;
545     } else xb = b;
546     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
547       idiag = a->idiag + bs2 * (a->mbs - 1);
548       i2    = bs * (m - 1);
549       switch (bs) {
550       case 1:
551         s[0] = xb[i2];
552         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
553         x[i2] = xw[0];
554         i2 -= 1;
555         for (i = m - 2; i >= 0; i--) {
556           v    = aa + (diag[i] + 1);
557           vi   = aj + diag[i] + 1;
558           nz   = ai[i + 1] - diag[i] - 1;
559           s[0] = xb[i2];
560           for (j = 0; j < nz; j++) {
561             xw[0] = x[vi[j]];
562             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
563           }
564           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
565           x[i2] = xw[0];
566           idiag -= 1;
567           i2 -= 1;
568         }
569         break;
570       case 2:
571         s[0] = xb[i2];
572         s[1] = xb[i2 + 1];
573         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
574         x[i2]     = xw[0];
575         x[i2 + 1] = xw[1];
576         i2 -= 2;
577         idiag -= 4;
578         for (i = m - 2; i >= 0; i--) {
579           v    = aa + 4 * (diag[i] + 1);
580           vi   = aj + diag[i] + 1;
581           nz   = ai[i + 1] - diag[i] - 1;
582           s[0] = xb[i2];
583           s[1] = xb[i2 + 1];
584           for (j = 0; j < nz; j++) {
585             idx   = 2 * vi[j];
586             it    = 4 * j;
587             xw[0] = x[idx];
588             xw[1] = x[1 + idx];
589             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
590           }
591           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
592           x[i2]     = xw[0];
593           x[i2 + 1] = xw[1];
594           idiag -= 4;
595           i2 -= 2;
596         }
597         break;
598       case 3:
599         s[0] = xb[i2];
600         s[1] = xb[i2 + 1];
601         s[2] = xb[i2 + 2];
602         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
603         x[i2]     = xw[0];
604         x[i2 + 1] = xw[1];
605         x[i2 + 2] = xw[2];
606         i2 -= 3;
607         idiag -= 9;
608         for (i = m - 2; i >= 0; i--) {
609           v    = aa + 9 * (diag[i] + 1);
610           vi   = aj + diag[i] + 1;
611           nz   = ai[i + 1] - diag[i] - 1;
612           s[0] = xb[i2];
613           s[1] = xb[i2 + 1];
614           s[2] = xb[i2 + 2];
615           while (nz--) {
616             idx   = 3 * (*vi++);
617             xw[0] = x[idx];
618             xw[1] = x[1 + idx];
619             xw[2] = x[2 + idx];
620             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
621             v += 9;
622           }
623           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
624           x[i2]     = xw[0];
625           x[i2 + 1] = xw[1];
626           x[i2 + 2] = xw[2];
627           idiag -= 9;
628           i2 -= 3;
629         }
630         break;
631       case 4:
632         s[0] = xb[i2];
633         s[1] = xb[i2 + 1];
634         s[2] = xb[i2 + 2];
635         s[3] = xb[i2 + 3];
636         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
637         x[i2]     = xw[0];
638         x[i2 + 1] = xw[1];
639         x[i2 + 2] = xw[2];
640         x[i2 + 3] = xw[3];
641         i2 -= 4;
642         idiag -= 16;
643         for (i = m - 2; i >= 0; i--) {
644           v    = aa + 16 * (diag[i] + 1);
645           vi   = aj + diag[i] + 1;
646           nz   = ai[i + 1] - diag[i] - 1;
647           s[0] = xb[i2];
648           s[1] = xb[i2 + 1];
649           s[2] = xb[i2 + 2];
650           s[3] = xb[i2 + 3];
651           while (nz--) {
652             idx   = 4 * (*vi++);
653             xw[0] = x[idx];
654             xw[1] = x[1 + idx];
655             xw[2] = x[2 + idx];
656             xw[3] = x[3 + idx];
657             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
658             v += 16;
659           }
660           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
661           x[i2]     = xw[0];
662           x[i2 + 1] = xw[1];
663           x[i2 + 2] = xw[2];
664           x[i2 + 3] = xw[3];
665           idiag -= 16;
666           i2 -= 4;
667         }
668         break;
669       case 5:
670         s[0] = xb[i2];
671         s[1] = xb[i2 + 1];
672         s[2] = xb[i2 + 2];
673         s[3] = xb[i2 + 3];
674         s[4] = xb[i2 + 4];
675         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
676         x[i2]     = xw[0];
677         x[i2 + 1] = xw[1];
678         x[i2 + 2] = xw[2];
679         x[i2 + 3] = xw[3];
680         x[i2 + 4] = xw[4];
681         i2 -= 5;
682         idiag -= 25;
683         for (i = m - 2; i >= 0; i--) {
684           v    = aa + 25 * (diag[i] + 1);
685           vi   = aj + diag[i] + 1;
686           nz   = ai[i + 1] - diag[i] - 1;
687           s[0] = xb[i2];
688           s[1] = xb[i2 + 1];
689           s[2] = xb[i2 + 2];
690           s[3] = xb[i2 + 3];
691           s[4] = xb[i2 + 4];
692           while (nz--) {
693             idx   = 5 * (*vi++);
694             xw[0] = x[idx];
695             xw[1] = x[1 + idx];
696             xw[2] = x[2 + idx];
697             xw[3] = x[3 + idx];
698             xw[4] = x[4 + idx];
699             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
700             v += 25;
701           }
702           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
703           x[i2]     = xw[0];
704           x[i2 + 1] = xw[1];
705           x[i2 + 2] = xw[2];
706           x[i2 + 3] = xw[3];
707           x[i2 + 4] = xw[4];
708           idiag -= 25;
709           i2 -= 5;
710         }
711         break;
712       case 6:
713         s[0] = xb[i2];
714         s[1] = xb[i2 + 1];
715         s[2] = xb[i2 + 2];
716         s[3] = xb[i2 + 3];
717         s[4] = xb[i2 + 4];
718         s[5] = xb[i2 + 5];
719         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
720         x[i2]     = xw[0];
721         x[i2 + 1] = xw[1];
722         x[i2 + 2] = xw[2];
723         x[i2 + 3] = xw[3];
724         x[i2 + 4] = xw[4];
725         x[i2 + 5] = xw[5];
726         i2 -= 6;
727         idiag -= 36;
728         for (i = m - 2; i >= 0; i--) {
729           v    = aa + 36 * (diag[i] + 1);
730           vi   = aj + diag[i] + 1;
731           nz   = ai[i + 1] - diag[i] - 1;
732           s[0] = xb[i2];
733           s[1] = xb[i2 + 1];
734           s[2] = xb[i2 + 2];
735           s[3] = xb[i2 + 3];
736           s[4] = xb[i2 + 4];
737           s[5] = xb[i2 + 5];
738           while (nz--) {
739             idx   = 6 * (*vi++);
740             xw[0] = x[idx];
741             xw[1] = x[1 + idx];
742             xw[2] = x[2 + idx];
743             xw[3] = x[3 + idx];
744             xw[4] = x[4 + idx];
745             xw[5] = x[5 + idx];
746             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
747             v += 36;
748           }
749           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
750           x[i2]     = xw[0];
751           x[i2 + 1] = xw[1];
752           x[i2 + 2] = xw[2];
753           x[i2 + 3] = xw[3];
754           x[i2 + 4] = xw[4];
755           x[i2 + 5] = xw[5];
756           idiag -= 36;
757           i2 -= 6;
758         }
759         break;
760       case 7:
761         s[0] = xb[i2];
762         s[1] = xb[i2 + 1];
763         s[2] = xb[i2 + 2];
764         s[3] = xb[i2 + 3];
765         s[4] = xb[i2 + 4];
766         s[5] = xb[i2 + 5];
767         s[6] = xb[i2 + 6];
768         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
769         x[i2]     = xw[0];
770         x[i2 + 1] = xw[1];
771         x[i2 + 2] = xw[2];
772         x[i2 + 3] = xw[3];
773         x[i2 + 4] = xw[4];
774         x[i2 + 5] = xw[5];
775         x[i2 + 6] = xw[6];
776         i2 -= 7;
777         idiag -= 49;
778         for (i = m - 2; i >= 0; i--) {
779           v    = aa + 49 * (diag[i] + 1);
780           vi   = aj + diag[i] + 1;
781           nz   = ai[i + 1] - diag[i] - 1;
782           s[0] = xb[i2];
783           s[1] = xb[i2 + 1];
784           s[2] = xb[i2 + 2];
785           s[3] = xb[i2 + 3];
786           s[4] = xb[i2 + 4];
787           s[5] = xb[i2 + 5];
788           s[6] = xb[i2 + 6];
789           while (nz--) {
790             idx   = 7 * (*vi++);
791             xw[0] = x[idx];
792             xw[1] = x[1 + idx];
793             xw[2] = x[2 + idx];
794             xw[3] = x[3 + idx];
795             xw[4] = x[4 + idx];
796             xw[5] = x[5 + idx];
797             xw[6] = x[6 + idx];
798             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
799             v += 49;
800           }
801           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
802           x[i2]     = xw[0];
803           x[i2 + 1] = xw[1];
804           x[i2 + 2] = xw[2];
805           x[i2 + 3] = xw[3];
806           x[i2 + 4] = xw[4];
807           x[i2 + 5] = xw[5];
808           x[i2 + 6] = xw[6];
809           idiag -= 49;
810           i2 -= 7;
811         }
812         break;
813       default:
814         PetscCall(PetscArraycpy(w, xb + i2, bs));
815         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
816         i2 -= bs;
817         idiag -= bs2;
818         for (i = m - 2; i >= 0; i--) {
819           v  = aa + bs2 * (diag[i] + 1);
820           vi = aj + diag[i] + 1;
821           nz = ai[i + 1] - diag[i] - 1;
822 
823           PetscCall(PetscArraycpy(w, xb + i2, bs));
824           /* copy all rows of x that are needed into contiguous space */
825           workt = work;
826           for (j = 0; j < nz; j++) {
827             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
828             workt += bs;
829           }
830           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
831           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
832 
833           idiag -= bs2;
834           i2 -= bs;
835         }
836         break;
837       }
838       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
839     }
840     its--;
841   }
842   while (its--) {
843     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
844       idiag = a->idiag;
845       i2    = 0;
846       switch (bs) {
847       case 1:
848         for (i = 0; i < m; i++) {
849           v    = aa + ai[i];
850           vi   = aj + ai[i];
851           nz   = ai[i + 1] - ai[i];
852           s[0] = b[i2];
853           for (j = 0; j < nz; j++) {
854             xw[0] = x[vi[j]];
855             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
856           }
857           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
858           x[i2] += xw[0];
859           idiag += 1;
860           i2 += 1;
861         }
862         break;
863       case 2:
864         for (i = 0; i < m; i++) {
865           v    = aa + 4 * ai[i];
866           vi   = aj + ai[i];
867           nz   = ai[i + 1] - ai[i];
868           s[0] = b[i2];
869           s[1] = b[i2 + 1];
870           for (j = 0; j < nz; j++) {
871             idx   = 2 * vi[j];
872             it    = 4 * j;
873             xw[0] = x[idx];
874             xw[1] = x[1 + idx];
875             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
876           }
877           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
878           x[i2] += xw[0];
879           x[i2 + 1] += xw[1];
880           idiag += 4;
881           i2 += 2;
882         }
883         break;
884       case 3:
885         for (i = 0; i < m; i++) {
886           v    = aa + 9 * ai[i];
887           vi   = aj + ai[i];
888           nz   = ai[i + 1] - ai[i];
889           s[0] = b[i2];
890           s[1] = b[i2 + 1];
891           s[2] = b[i2 + 2];
892           while (nz--) {
893             idx   = 3 * (*vi++);
894             xw[0] = x[idx];
895             xw[1] = x[1 + idx];
896             xw[2] = x[2 + idx];
897             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
898             v += 9;
899           }
900           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
901           x[i2] += xw[0];
902           x[i2 + 1] += xw[1];
903           x[i2 + 2] += xw[2];
904           idiag += 9;
905           i2 += 3;
906         }
907         break;
908       case 4:
909         for (i = 0; i < m; i++) {
910           v    = aa + 16 * ai[i];
911           vi   = aj + ai[i];
912           nz   = ai[i + 1] - ai[i];
913           s[0] = b[i2];
914           s[1] = b[i2 + 1];
915           s[2] = b[i2 + 2];
916           s[3] = b[i2 + 3];
917           while (nz--) {
918             idx   = 4 * (*vi++);
919             xw[0] = x[idx];
920             xw[1] = x[1 + idx];
921             xw[2] = x[2 + idx];
922             xw[3] = x[3 + idx];
923             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
924             v += 16;
925           }
926           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
927           x[i2] += xw[0];
928           x[i2 + 1] += xw[1];
929           x[i2 + 2] += xw[2];
930           x[i2 + 3] += xw[3];
931           idiag += 16;
932           i2 += 4;
933         }
934         break;
935       case 5:
936         for (i = 0; i < m; i++) {
937           v    = aa + 25 * ai[i];
938           vi   = aj + ai[i];
939           nz   = ai[i + 1] - ai[i];
940           s[0] = b[i2];
941           s[1] = b[i2 + 1];
942           s[2] = b[i2 + 2];
943           s[3] = b[i2 + 3];
944           s[4] = b[i2 + 4];
945           while (nz--) {
946             idx   = 5 * (*vi++);
947             xw[0] = x[idx];
948             xw[1] = x[1 + idx];
949             xw[2] = x[2 + idx];
950             xw[3] = x[3 + idx];
951             xw[4] = x[4 + idx];
952             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
953             v += 25;
954           }
955           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
956           x[i2] += xw[0];
957           x[i2 + 1] += xw[1];
958           x[i2 + 2] += xw[2];
959           x[i2 + 3] += xw[3];
960           x[i2 + 4] += xw[4];
961           idiag += 25;
962           i2 += 5;
963         }
964         break;
965       case 6:
966         for (i = 0; i < m; i++) {
967           v    = aa + 36 * ai[i];
968           vi   = aj + ai[i];
969           nz   = ai[i + 1] - ai[i];
970           s[0] = b[i2];
971           s[1] = b[i2 + 1];
972           s[2] = b[i2 + 2];
973           s[3] = b[i2 + 3];
974           s[4] = b[i2 + 4];
975           s[5] = b[i2 + 5];
976           while (nz--) {
977             idx   = 6 * (*vi++);
978             xw[0] = x[idx];
979             xw[1] = x[1 + idx];
980             xw[2] = x[2 + idx];
981             xw[3] = x[3 + idx];
982             xw[4] = x[4 + idx];
983             xw[5] = x[5 + idx];
984             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
985             v += 36;
986           }
987           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
988           x[i2] += xw[0];
989           x[i2 + 1] += xw[1];
990           x[i2 + 2] += xw[2];
991           x[i2 + 3] += xw[3];
992           x[i2 + 4] += xw[4];
993           x[i2 + 5] += xw[5];
994           idiag += 36;
995           i2 += 6;
996         }
997         break;
998       case 7:
999         for (i = 0; i < m; i++) {
1000           v    = aa + 49 * ai[i];
1001           vi   = aj + ai[i];
1002           nz   = ai[i + 1] - ai[i];
1003           s[0] = b[i2];
1004           s[1] = b[i2 + 1];
1005           s[2] = b[i2 + 2];
1006           s[3] = b[i2 + 3];
1007           s[4] = b[i2 + 4];
1008           s[5] = b[i2 + 5];
1009           s[6] = b[i2 + 6];
1010           while (nz--) {
1011             idx   = 7 * (*vi++);
1012             xw[0] = x[idx];
1013             xw[1] = x[1 + idx];
1014             xw[2] = x[2 + idx];
1015             xw[3] = x[3 + idx];
1016             xw[4] = x[4 + idx];
1017             xw[5] = x[5 + idx];
1018             xw[6] = x[6 + idx];
1019             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1020             v += 49;
1021           }
1022           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1023           x[i2] += xw[0];
1024           x[i2 + 1] += xw[1];
1025           x[i2 + 2] += xw[2];
1026           x[i2 + 3] += xw[3];
1027           x[i2 + 4] += xw[4];
1028           x[i2 + 5] += xw[5];
1029           x[i2 + 6] += xw[6];
1030           idiag += 49;
1031           i2 += 7;
1032         }
1033         break;
1034       default:
1035         for (i = 0; i < m; i++) {
1036           v  = aa + bs2 * ai[i];
1037           vi = aj + ai[i];
1038           nz = ai[i + 1] - ai[i];
1039 
1040           PetscCall(PetscArraycpy(w, b + i2, bs));
1041           /* copy all rows of x that are needed into contiguous space */
1042           workt = work;
1043           for (j = 0; j < nz; j++) {
1044             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1045             workt += bs;
1046           }
1047           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1048           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1049 
1050           idiag += bs2;
1051           i2 += bs;
1052         }
1053         break;
1054       }
1055       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1056     }
1057     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1058       idiag = a->idiag + bs2 * (a->mbs - 1);
1059       i2    = bs * (m - 1);
1060       switch (bs) {
1061       case 1:
1062         for (i = m - 1; i >= 0; i--) {
1063           v    = aa + ai[i];
1064           vi   = aj + ai[i];
1065           nz   = ai[i + 1] - ai[i];
1066           s[0] = b[i2];
1067           for (j = 0; j < nz; j++) {
1068             xw[0] = x[vi[j]];
1069             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1070           }
1071           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1072           x[i2] += xw[0];
1073           idiag -= 1;
1074           i2 -= 1;
1075         }
1076         break;
1077       case 2:
1078         for (i = m - 1; i >= 0; i--) {
1079           v    = aa + 4 * ai[i];
1080           vi   = aj + ai[i];
1081           nz   = ai[i + 1] - ai[i];
1082           s[0] = b[i2];
1083           s[1] = b[i2 + 1];
1084           for (j = 0; j < nz; j++) {
1085             idx   = 2 * vi[j];
1086             it    = 4 * j;
1087             xw[0] = x[idx];
1088             xw[1] = x[1 + idx];
1089             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1090           }
1091           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
1092           x[i2] += xw[0];
1093           x[i2 + 1] += xw[1];
1094           idiag -= 4;
1095           i2 -= 2;
1096         }
1097         break;
1098       case 3:
1099         for (i = m - 1; i >= 0; i--) {
1100           v    = aa + 9 * ai[i];
1101           vi   = aj + ai[i];
1102           nz   = ai[i + 1] - ai[i];
1103           s[0] = b[i2];
1104           s[1] = b[i2 + 1];
1105           s[2] = b[i2 + 2];
1106           while (nz--) {
1107             idx   = 3 * (*vi++);
1108             xw[0] = x[idx];
1109             xw[1] = x[1 + idx];
1110             xw[2] = x[2 + idx];
1111             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1112             v += 9;
1113           }
1114           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
1115           x[i2] += xw[0];
1116           x[i2 + 1] += xw[1];
1117           x[i2 + 2] += xw[2];
1118           idiag -= 9;
1119           i2 -= 3;
1120         }
1121         break;
1122       case 4:
1123         for (i = m - 1; i >= 0; i--) {
1124           v    = aa + 16 * ai[i];
1125           vi   = aj + ai[i];
1126           nz   = ai[i + 1] - ai[i];
1127           s[0] = b[i2];
1128           s[1] = b[i2 + 1];
1129           s[2] = b[i2 + 2];
1130           s[3] = b[i2 + 3];
1131           while (nz--) {
1132             idx   = 4 * (*vi++);
1133             xw[0] = x[idx];
1134             xw[1] = x[1 + idx];
1135             xw[2] = x[2 + idx];
1136             xw[3] = x[3 + idx];
1137             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1138             v += 16;
1139           }
1140           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
1141           x[i2] += xw[0];
1142           x[i2 + 1] += xw[1];
1143           x[i2 + 2] += xw[2];
1144           x[i2 + 3] += xw[3];
1145           idiag -= 16;
1146           i2 -= 4;
1147         }
1148         break;
1149       case 5:
1150         for (i = m - 1; i >= 0; i--) {
1151           v    = aa + 25 * ai[i];
1152           vi   = aj + ai[i];
1153           nz   = ai[i + 1] - ai[i];
1154           s[0] = b[i2];
1155           s[1] = b[i2 + 1];
1156           s[2] = b[i2 + 2];
1157           s[3] = b[i2 + 3];
1158           s[4] = b[i2 + 4];
1159           while (nz--) {
1160             idx   = 5 * (*vi++);
1161             xw[0] = x[idx];
1162             xw[1] = x[1 + idx];
1163             xw[2] = x[2 + idx];
1164             xw[3] = x[3 + idx];
1165             xw[4] = x[4 + idx];
1166             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1167             v += 25;
1168           }
1169           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
1170           x[i2] += xw[0];
1171           x[i2 + 1] += xw[1];
1172           x[i2 + 2] += xw[2];
1173           x[i2 + 3] += xw[3];
1174           x[i2 + 4] += xw[4];
1175           idiag -= 25;
1176           i2 -= 5;
1177         }
1178         break;
1179       case 6:
1180         for (i = m - 1; i >= 0; i--) {
1181           v    = aa + 36 * ai[i];
1182           vi   = aj + ai[i];
1183           nz   = ai[i + 1] - ai[i];
1184           s[0] = b[i2];
1185           s[1] = b[i2 + 1];
1186           s[2] = b[i2 + 2];
1187           s[3] = b[i2 + 3];
1188           s[4] = b[i2 + 4];
1189           s[5] = b[i2 + 5];
1190           while (nz--) {
1191             idx   = 6 * (*vi++);
1192             xw[0] = x[idx];
1193             xw[1] = x[1 + idx];
1194             xw[2] = x[2 + idx];
1195             xw[3] = x[3 + idx];
1196             xw[4] = x[4 + idx];
1197             xw[5] = x[5 + idx];
1198             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1199             v += 36;
1200           }
1201           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
1202           x[i2] += xw[0];
1203           x[i2 + 1] += xw[1];
1204           x[i2 + 2] += xw[2];
1205           x[i2 + 3] += xw[3];
1206           x[i2 + 4] += xw[4];
1207           x[i2 + 5] += xw[5];
1208           idiag -= 36;
1209           i2 -= 6;
1210         }
1211         break;
1212       case 7:
1213         for (i = m - 1; i >= 0; i--) {
1214           v    = aa + 49 * ai[i];
1215           vi   = aj + ai[i];
1216           nz   = ai[i + 1] - ai[i];
1217           s[0] = b[i2];
1218           s[1] = b[i2 + 1];
1219           s[2] = b[i2 + 2];
1220           s[3] = b[i2 + 3];
1221           s[4] = b[i2 + 4];
1222           s[5] = b[i2 + 5];
1223           s[6] = b[i2 + 6];
1224           while (nz--) {
1225             idx   = 7 * (*vi++);
1226             xw[0] = x[idx];
1227             xw[1] = x[1 + idx];
1228             xw[2] = x[2 + idx];
1229             xw[3] = x[3 + idx];
1230             xw[4] = x[4 + idx];
1231             xw[5] = x[5 + idx];
1232             xw[6] = x[6 + idx];
1233             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1234             v += 49;
1235           }
1236           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1237           x[i2] += xw[0];
1238           x[i2 + 1] += xw[1];
1239           x[i2 + 2] += xw[2];
1240           x[i2 + 3] += xw[3];
1241           x[i2 + 4] += xw[4];
1242           x[i2 + 5] += xw[5];
1243           x[i2 + 6] += xw[6];
1244           idiag -= 49;
1245           i2 -= 7;
1246         }
1247         break;
1248       default:
1249         for (i = m - 1; i >= 0; i--) {
1250           v  = aa + bs2 * ai[i];
1251           vi = aj + ai[i];
1252           nz = ai[i + 1] - ai[i];
1253 
1254           PetscCall(PetscArraycpy(w, b + i2, bs));
1255           /* copy all rows of x that are needed into contiguous space */
1256           workt = work;
1257           for (j = 0; j < nz; j++) {
1258             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1259             workt += bs;
1260           }
1261           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1262           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1263 
1264           idiag -= bs2;
1265           i2 -= bs;
1266         }
1267         break;
1268       }
1269       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1270     }
1271   }
1272   PetscCall(VecRestoreArray(xx, &x));
1273   PetscCall(VecRestoreArrayRead(bb, &b));
1274   PetscFunctionReturn(PETSC_SUCCESS);
1275 }
1276 
1277 /*
1278     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1279 */
1280 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1281   #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1282 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1283   #define matsetvaluesblocked4_ matsetvaluesblocked4
1284 #endif
1285 
1286 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1287 {
1288   Mat                A = *AA;
1289   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1290   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1291   PetscInt          *ai = a->i, *ailen = a->ilen;
1292   PetscInt          *aj = a->j, stepval, lastcol = -1;
1293   const PetscScalar *value = v;
1294   MatScalar         *ap, *aa = a->a, *bap;
1295 
1296   PetscFunctionBegin;
1297   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1298   stepval = (n - 1) * 4;
1299   for (k = 0; k < m; k++) { /* loop over added rows */
1300     row  = im[k];
1301     rp   = aj + ai[row];
1302     ap   = aa + 16 * ai[row];
1303     nrow = ailen[row];
1304     low  = 0;
1305     high = nrow;
1306     for (l = 0; l < n; l++) { /* loop over added columns */
1307       col = in[l];
1308       if (col <= lastcol) low = 0;
1309       else high = nrow;
1310       lastcol = col;
1311       value   = v + k * (stepval + 4 + l) * 4;
1312       while (high - low > 7) {
1313         t = (low + high) / 2;
1314         if (rp[t] > col) high = t;
1315         else low = t;
1316       }
1317       for (i = low; i < high; i++) {
1318         if (rp[i] > col) break;
1319         if (rp[i] == col) {
1320           bap = ap + 16 * i;
1321           for (ii = 0; ii < 4; ii++, value += stepval) {
1322             for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1323           }
1324           goto noinsert2;
1325         }
1326       }
1327       N = nrow++ - 1;
1328       high++; /* added new column index thus must search to one higher than before */
1329       /* shift up all the later entries in this row */
1330       for (ii = N; ii >= i; ii--) {
1331         rp[ii + 1] = rp[ii];
1332         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1333       }
1334       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1335       rp[i] = col;
1336       bap   = ap + 16 * i;
1337       for (ii = 0; ii < 4; ii++, value += stepval) {
1338         for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1339       }
1340     noinsert2:;
1341       low = i;
1342     }
1343     ailen[row] = nrow;
1344   }
1345   PetscFunctionReturnVoid();
1346 }
1347 
1348 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1349   #define matsetvalues4_ MATSETVALUES4
1350 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1351   #define matsetvalues4_ matsetvalues4
1352 #endif
1353 
1354 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1355 {
1356   Mat          A = *AA;
1357   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1358   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1359   PetscInt    *ai = a->i, *ailen = a->ilen;
1360   PetscInt    *aj = a->j, brow, bcol;
1361   PetscInt     ridx, cidx, lastcol = -1;
1362   MatScalar   *ap, value, *aa      = a->a, *bap;
1363 
1364   PetscFunctionBegin;
1365   for (k = 0; k < m; k++) { /* loop over added rows */
1366     row  = im[k];
1367     brow = row / 4;
1368     rp   = aj + ai[brow];
1369     ap   = aa + 16 * ai[brow];
1370     nrow = ailen[brow];
1371     low  = 0;
1372     high = nrow;
1373     for (l = 0; l < n; l++) { /* loop over added columns */
1374       col   = in[l];
1375       bcol  = col / 4;
1376       ridx  = row % 4;
1377       cidx  = col % 4;
1378       value = v[l + k * n];
1379       if (col <= lastcol) low = 0;
1380       else high = nrow;
1381       lastcol = col;
1382       while (high - low > 7) {
1383         t = (low + high) / 2;
1384         if (rp[t] > bcol) high = t;
1385         else low = t;
1386       }
1387       for (i = low; i < high; i++) {
1388         if (rp[i] > bcol) break;
1389         if (rp[i] == bcol) {
1390           bap = ap + 16 * i + 4 * cidx + ridx;
1391           *bap += value;
1392           goto noinsert1;
1393         }
1394       }
1395       N = nrow++ - 1;
1396       high++; /* added new column thus must search to one higher than before */
1397       /* shift up all the later entries in this row */
1398       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
1399       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
1400       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1401       rp[i]                        = bcol;
1402       ap[16 * i + 4 * cidx + ridx] = value;
1403     noinsert1:;
1404       low = i;
1405     }
1406     ailen[brow] = nrow;
1407   }
1408   PetscFunctionReturnVoid();
1409 }
1410 
1411 /*
1412      Checks for missing diagonals
1413 */
1414 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1415 {
1416   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1417   PetscInt    *diag, *ii = a->i, i;
1418 
1419   PetscFunctionBegin;
1420   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
1421   *missing = PETSC_FALSE;
1422   if (A->rmap->n > 0 && !ii) {
1423     *missing = PETSC_TRUE;
1424     if (d) *d = 0;
1425     PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n"));
1426   } else {
1427     PetscInt n;
1428     n    = PetscMin(a->mbs, a->nbs);
1429     diag = a->diag;
1430     for (i = 0; i < n; i++) {
1431       if (diag[i] >= ii[i + 1]) {
1432         *missing = PETSC_TRUE;
1433         if (d) *d = i;
1434         PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i));
1435         break;
1436       }
1437     }
1438   }
1439   PetscFunctionReturn(PETSC_SUCCESS);
1440 }
1441 
1442 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1443 {
1444   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1445   PetscInt     i, j, m = a->mbs;
1446 
1447   PetscFunctionBegin;
1448   if (!a->diag) {
1449     PetscCall(PetscMalloc1(m, &a->diag));
1450     a->free_diag = PETSC_TRUE;
1451   }
1452   for (i = 0; i < m; i++) {
1453     a->diag[i] = a->i[i + 1];
1454     for (j = a->i[i]; j < a->i[i + 1]; j++) {
1455       if (a->j[j] == i) {
1456         a->diag[i] = j;
1457         break;
1458       }
1459     }
1460   }
1461   PetscFunctionReturn(PETSC_SUCCESS);
1462 }
1463 
1464 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1465 {
1466   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1467   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
1468   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
1469 
1470   PetscFunctionBegin;
1471   *nn = n;
1472   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1473   if (symmetric) {
1474     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1475     nz = tia[n];
1476   } else {
1477     tia = a->i;
1478     tja = a->j;
1479   }
1480 
1481   if (!blockcompressed && bs > 1) {
1482     (*nn) *= bs;
1483     /* malloc & create the natural set of indices */
1484     PetscCall(PetscMalloc1((n + 1) * bs, ia));
1485     if (n) {
1486       (*ia)[0] = oshift;
1487       for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
1488     }
1489 
1490     for (i = 1; i < n; i++) {
1491       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1492       for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
1493     }
1494     if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1495 
1496     if (inja) {
1497       PetscCall(PetscMalloc1(nz * bs * bs, ja));
1498       cnt = 0;
1499       for (i = 0; i < n; i++) {
1500         for (j = 0; j < bs; j++) {
1501           for (k = tia[i]; k < tia[i + 1]; k++) {
1502             for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
1503           }
1504         }
1505       }
1506     }
1507 
1508     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1509       PetscCall(PetscFree(tia));
1510       PetscCall(PetscFree(tja));
1511     }
1512   } else if (oshift == 1) {
1513     if (symmetric) {
1514       nz = tia[A->rmap->n / bs];
1515       /*  add 1 to i and j indices */
1516       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1517       *ia = tia;
1518       if (ja) {
1519         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1520         *ja = tja;
1521       }
1522     } else {
1523       nz = a->i[A->rmap->n / bs];
1524       /* malloc space and  add 1 to i and j indices */
1525       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1526       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1527       if (ja) {
1528         PetscCall(PetscMalloc1(nz, ja));
1529         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1530       }
1531     }
1532   } else {
1533     *ia = tia;
1534     if (ja) *ja = tja;
1535   }
1536   PetscFunctionReturn(PETSC_SUCCESS);
1537 }
1538 
1539 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1540 {
1541   PetscFunctionBegin;
1542   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1543   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1544     PetscCall(PetscFree(*ia));
1545     if (ja) PetscCall(PetscFree(*ja));
1546   }
1547   PetscFunctionReturn(PETSC_SUCCESS);
1548 }
1549 
1550 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1551 {
1552   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1553 
1554   PetscFunctionBegin;
1555   if (A->hash_active) {
1556     PetscInt bs;
1557     A->ops[0] = a->cops;
1558     PetscCall(PetscHMapIJVDestroy(&a->ht));
1559     PetscCall(MatGetBlockSize(A, &bs));
1560     if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht));
1561     PetscCall(PetscFree(a->dnz));
1562     PetscCall(PetscFree(a->bdnz));
1563     A->hash_active = PETSC_FALSE;
1564   }
1565   PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
1566   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
1567   PetscCall(ISDestroy(&a->row));
1568   PetscCall(ISDestroy(&a->col));
1569   if (a->free_diag) PetscCall(PetscFree(a->diag));
1570   PetscCall(PetscFree(a->idiag));
1571   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
1572   PetscCall(PetscFree(a->solve_work));
1573   PetscCall(PetscFree(a->mult_work));
1574   PetscCall(PetscFree(a->sor_workt));
1575   PetscCall(PetscFree(a->sor_work));
1576   PetscCall(ISDestroy(&a->icol));
1577   PetscCall(PetscFree(a->saved_values));
1578   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1579 
1580   PetscCall(MatDestroy(&a->sbaijMat));
1581   PetscCall(MatDestroy(&a->parent));
1582   PetscCall(PetscFree(A->data));
1583 
1584   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
1585   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
1586   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
1587   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
1588   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
1589   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
1590   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
1591   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
1592   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
1593   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
1594   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
1595   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
1596 #if defined(PETSC_HAVE_HYPRE)
1597   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
1598 #endif
1599   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
1600   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1601   PetscFunctionReturn(PETSC_SUCCESS);
1602 }
1603 
1604 static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1605 {
1606   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1607 
1608   PetscFunctionBegin;
1609   switch (op) {
1610   case MAT_ROW_ORIENTED:
1611     a->roworiented = flg;
1612     break;
1613   case MAT_KEEP_NONZERO_PATTERN:
1614     a->keepnonzeropattern = flg;
1615     break;
1616   case MAT_NEW_NONZERO_LOCATIONS:
1617     a->nonew = (flg ? 0 : 1);
1618     break;
1619   case MAT_NEW_NONZERO_LOCATION_ERR:
1620     a->nonew = (flg ? -1 : 0);
1621     break;
1622   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1623     a->nonew = (flg ? -2 : 0);
1624     break;
1625   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1626     a->nounused = (flg ? -1 : 0);
1627     break;
1628   default:
1629     break;
1630   }
1631   PetscFunctionReturn(PETSC_SUCCESS);
1632 }
1633 
1634 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1635 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1636 {
1637   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
1638   MatScalar   *aa_i;
1639   PetscScalar *v_i;
1640 
1641   PetscFunctionBegin;
1642   bs  = A->rmap->bs;
1643   bs2 = bs * bs;
1644   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1645 
1646   bn  = row / bs; /* Block number */
1647   bp  = row % bs; /* Block Position */
1648   M   = ai[bn + 1] - ai[bn];
1649   *nz = bs * M;
1650 
1651   if (v) {
1652     *v = NULL;
1653     if (*nz) {
1654       PetscCall(PetscMalloc1(*nz, v));
1655       for (i = 0; i < M; i++) { /* for each block in the block row */
1656         v_i  = *v + i * bs;
1657         aa_i = aa + bs2 * (ai[bn] + i);
1658         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
1659       }
1660     }
1661   }
1662 
1663   if (idx) {
1664     *idx = NULL;
1665     if (*nz) {
1666       PetscCall(PetscMalloc1(*nz, idx));
1667       for (i = 0; i < M; i++) { /* for each block in the block row */
1668         idx_i = *idx + i * bs;
1669         itmp  = bs * aj[ai[bn] + i];
1670         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
1671       }
1672     }
1673   }
1674   PetscFunctionReturn(PETSC_SUCCESS);
1675 }
1676 
1677 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1678 {
1679   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1680 
1681   PetscFunctionBegin;
1682   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
1683   PetscFunctionReturn(PETSC_SUCCESS);
1684 }
1685 
1686 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1687 {
1688   PetscFunctionBegin;
1689   if (idx) PetscCall(PetscFree(*idx));
1690   if (v) PetscCall(PetscFree(*v));
1691   PetscFunctionReturn(PETSC_SUCCESS);
1692 }
1693 
1694 static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1695 {
1696   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
1697   Mat          C;
1698   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
1699   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
1700   MatScalar   *ata, *aa = a->a;
1701 
1702   PetscFunctionBegin;
1703   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1704   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1705   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1706     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1707 
1708     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
1709     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
1710     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
1711     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
1712 
1713     at  = (Mat_SeqBAIJ *)C->data;
1714     ati = at->i;
1715     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1716   } else {
1717     C   = *B;
1718     at  = (Mat_SeqBAIJ *)C->data;
1719     ati = at->i;
1720   }
1721 
1722   atj = at->j;
1723   ata = at->a;
1724 
1725   /* Copy ati into atfill so we have locations of the next free space in atj */
1726   PetscCall(PetscArraycpy(atfill, ati, nbs));
1727 
1728   /* Walk through A row-wise and mark nonzero entries of A^T. */
1729   for (i = 0; i < mbs; i++) {
1730     anzj = ai[i + 1] - ai[i];
1731     for (j = 0; j < anzj; j++) {
1732       atj[atfill[*aj]] = i;
1733       for (kr = 0; kr < bs; kr++) {
1734         for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
1735       }
1736       atfill[*aj++] += 1;
1737     }
1738   }
1739   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1740   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1741 
1742   /* Clean up temporary space and complete requests. */
1743   PetscCall(PetscFree(atfill));
1744 
1745   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1746     PetscCall(MatSetBlockSizes(C, A->cmap->bs, A->rmap->bs));
1747     *B = C;
1748   } else {
1749     PetscCall(MatHeaderMerge(A, &C));
1750   }
1751   PetscFunctionReturn(PETSC_SUCCESS);
1752 }
1753 
1754 static PetscErrorCode MatCompare_SeqBAIJ_Private(Mat A, Mat B, PetscReal tol, PetscBool *flg)
1755 {
1756   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1757 
1758   PetscFunctionBegin;
1759   /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */
1760   if (A->rmap->N != B->rmap->N || A->cmap->n != B->cmap->n || A->rmap->bs != B->rmap->bs || a->nz != b->nz) {
1761     *flg = PETSC_FALSE;
1762     PetscFunctionReturn(PETSC_SUCCESS);
1763   }
1764 
1765   /* if the a->i are the same */
1766   PetscCall(PetscArraycmp(a->i, b->i, a->mbs + 1, flg));
1767   if (!*flg) PetscFunctionReturn(PETSC_SUCCESS);
1768 
1769   /* if a->j are the same */
1770   PetscCall(PetscArraycmp(a->j, b->j, a->nz, flg));
1771   if (!*flg) PetscFunctionReturn(PETSC_SUCCESS);
1772 
1773   if (tol == 0.0) PetscCall(PetscArraycmp(a->a, b->a, a->nz * A->rmap->bs * A->rmap->bs, flg)); /* if a->a are the same */
1774   else {
1775     *flg = PETSC_TRUE;
1776     for (PetscInt i = 0; (i < a->nz * A->rmap->bs * A->rmap->bs) && *flg; ++i)
1777       if (PetscAbsScalar(a->a[i] - b->a[i]) > tol) *flg = PETSC_FALSE;
1778   }
1779   PetscFunctionReturn(PETSC_SUCCESS);
1780 }
1781 
1782 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1783 {
1784   Mat Btrans;
1785 
1786   PetscFunctionBegin;
1787   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
1788   PetscCall(MatCompare_SeqBAIJ_Private(A, Btrans, tol, f));
1789   PetscCall(MatDestroy(&Btrans));
1790   PetscFunctionReturn(PETSC_SUCCESS);
1791 }
1792 
1793 static PetscErrorCode MatEqual_SeqBAIJ(Mat A, Mat B, PetscBool *flg)
1794 {
1795   PetscFunctionBegin;
1796   PetscCall(MatCompare_SeqBAIJ_Private(A, B, 0.0, flg));
1797   PetscFunctionReturn(PETSC_SUCCESS);
1798 }
1799 
1800 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1801 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1802 {
1803   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1804   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1805   PetscInt    *rowlens, *colidxs;
1806   PetscScalar *matvals;
1807 
1808   PetscFunctionBegin;
1809   PetscCall(PetscViewerSetUp(viewer));
1810 
1811   M  = mat->rmap->N;
1812   N  = mat->cmap->N;
1813   m  = mat->rmap->n;
1814   bs = mat->rmap->bs;
1815   nz = bs * bs * A->nz;
1816 
1817   /* write matrix header */
1818   header[0] = MAT_FILE_CLASSID;
1819   header[1] = M;
1820   header[2] = N;
1821   header[3] = nz;
1822   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1823 
1824   /* store row lengths */
1825   PetscCall(PetscMalloc1(m, &rowlens));
1826   for (cnt = 0, i = 0; i < A->mbs; i++)
1827     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
1828   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
1829   PetscCall(PetscFree(rowlens));
1830 
1831   /* store column indices  */
1832   PetscCall(PetscMalloc1(nz, &colidxs));
1833   for (cnt = 0, i = 0; i < A->mbs; i++)
1834     for (k = 0; k < bs; k++)
1835       for (j = A->i[i]; j < A->i[i + 1]; j++)
1836         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
1837   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1838   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
1839   PetscCall(PetscFree(colidxs));
1840 
1841   /* store nonzero values */
1842   PetscCall(PetscMalloc1(nz, &matvals));
1843   for (cnt = 0, i = 0; i < A->mbs; i++)
1844     for (k = 0; k < bs; k++)
1845       for (j = A->i[i]; j < A->i[i + 1]; j++)
1846         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
1847   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1848   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
1849   PetscCall(PetscFree(matvals));
1850 
1851   /* write block size option to the viewer's .info file */
1852   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1853   PetscFunctionReturn(PETSC_SUCCESS);
1854 }
1855 
1856 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1857 {
1858   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1859   PetscInt     i, bs = A->rmap->bs, k;
1860 
1861   PetscFunctionBegin;
1862   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1863   for (i = 0; i < a->mbs; i++) {
1864     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
1865     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
1866     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1867   }
1868   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1869   PetscFunctionReturn(PETSC_SUCCESS);
1870 }
1871 
1872 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1873 {
1874   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1875   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1876   PetscViewerFormat format;
1877 
1878   PetscFunctionBegin;
1879   if (A->structure_only) {
1880     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
1881     PetscFunctionReturn(PETSC_SUCCESS);
1882   }
1883 
1884   PetscCall(PetscViewerGetFormat(viewer, &format));
1885   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1886     PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1887   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1888     const char *matname;
1889     Mat         aij;
1890     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
1891     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
1892     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
1893     PetscCall(MatView(aij, viewer));
1894     PetscCall(MatDestroy(&aij));
1895   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1896     PetscFunctionReturn(PETSC_SUCCESS);
1897   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1898     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1899     for (i = 0; i < a->mbs; i++) {
1900       for (j = 0; j < bs; j++) {
1901         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1902         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1903           for (l = 0; l < bs; l++) {
1904 #if defined(PETSC_USE_COMPLEX)
1905             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1906               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1907             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1908               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1909             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1910               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1911             }
1912 #else
1913             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1914 #endif
1915           }
1916         }
1917         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1918       }
1919     }
1920     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1921   } else {
1922     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1923     for (i = 0; i < a->mbs; i++) {
1924       for (j = 0; j < bs; j++) {
1925         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1926         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1927           for (l = 0; l < bs; l++) {
1928 #if defined(PETSC_USE_COMPLEX)
1929             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
1930               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1931             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
1932               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1933             } else {
1934               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1935             }
1936 #else
1937             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1938 #endif
1939           }
1940         }
1941         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1942       }
1943     }
1944     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1945   }
1946   PetscCall(PetscViewerFlush(viewer));
1947   PetscFunctionReturn(PETSC_SUCCESS);
1948 }
1949 
1950 #include <petscdraw.h>
1951 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1952 {
1953   Mat               A = (Mat)Aa;
1954   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1955   PetscInt          row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
1956   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1957   MatScalar        *aa;
1958   PetscViewer       viewer;
1959   PetscViewerFormat format;
1960   int               color;
1961 
1962   PetscFunctionBegin;
1963   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
1964   PetscCall(PetscViewerGetFormat(viewer, &format));
1965   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
1966 
1967   /* loop over matrix elements drawing boxes */
1968 
1969   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1970     PetscDrawCollectiveBegin(draw);
1971     /* Blue for negative, Cyan for zero and  Red for positive */
1972     color = PETSC_DRAW_BLUE;
1973     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1974       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1975         y_l = A->rmap->N - row - 1.0;
1976         y_r = y_l + 1.0;
1977         x_l = a->j[j] * bs;
1978         x_r = x_l + 1.0;
1979         aa  = a->a + j * bs2;
1980         for (k = 0; k < bs; k++) {
1981           for (l = 0; l < bs; l++) {
1982             if (PetscRealPart(*aa++) >= 0.) continue;
1983             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1984           }
1985         }
1986       }
1987     }
1988     color = PETSC_DRAW_CYAN;
1989     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1990       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1991         y_l = A->rmap->N - row - 1.0;
1992         y_r = y_l + 1.0;
1993         x_l = a->j[j] * bs;
1994         x_r = x_l + 1.0;
1995         aa  = a->a + j * bs2;
1996         for (k = 0; k < bs; k++) {
1997           for (l = 0; l < bs; l++) {
1998             if (PetscRealPart(*aa++) != 0.) continue;
1999             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2000           }
2001         }
2002       }
2003     }
2004     color = PETSC_DRAW_RED;
2005     for (i = 0, row = 0; i < mbs; i++, row += bs) {
2006       for (j = a->i[i]; j < a->i[i + 1]; j++) {
2007         y_l = A->rmap->N - row - 1.0;
2008         y_r = y_l + 1.0;
2009         x_l = a->j[j] * bs;
2010         x_r = x_l + 1.0;
2011         aa  = a->a + j * bs2;
2012         for (k = 0; k < bs; k++) {
2013           for (l = 0; l < bs; l++) {
2014             if (PetscRealPart(*aa++) <= 0.) continue;
2015             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2016           }
2017         }
2018       }
2019     }
2020     PetscDrawCollectiveEnd(draw);
2021   } else {
2022     /* use contour shading to indicate magnitude of values */
2023     /* first determine max of all nonzero values */
2024     PetscReal minv = 0.0, maxv = 0.0;
2025     PetscDraw popup;
2026 
2027     for (i = 0; i < a->nz * a->bs2; i++) {
2028       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
2029     }
2030     if (minv >= maxv) maxv = minv + PETSC_SMALL;
2031     PetscCall(PetscDrawGetPopup(draw, &popup));
2032     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
2033 
2034     PetscDrawCollectiveBegin(draw);
2035     for (i = 0, row = 0; i < mbs; i++, row += bs) {
2036       for (j = a->i[i]; j < a->i[i + 1]; j++) {
2037         y_l = A->rmap->N - row - 1.0;
2038         y_r = y_l + 1.0;
2039         x_l = a->j[j] * bs;
2040         x_r = x_l + 1.0;
2041         aa  = a->a + j * bs2;
2042         for (k = 0; k < bs; k++) {
2043           for (l = 0; l < bs; l++) {
2044             MatScalar v = *aa++;
2045             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
2046             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2047           }
2048         }
2049       }
2050     }
2051     PetscDrawCollectiveEnd(draw);
2052   }
2053   PetscFunctionReturn(PETSC_SUCCESS);
2054 }
2055 
2056 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2057 {
2058   PetscReal xl, yl, xr, yr, w, h;
2059   PetscDraw draw;
2060   PetscBool isnull;
2061 
2062   PetscFunctionBegin;
2063   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
2064   PetscCall(PetscDrawIsNull(draw, &isnull));
2065   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
2066 
2067   xr = A->cmap->n;
2068   yr = A->rmap->N;
2069   h  = yr / 10.0;
2070   w  = xr / 10.0;
2071   xr += w;
2072   yr += h;
2073   xl = -w;
2074   yl = -h;
2075   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
2076   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
2077   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
2078   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
2079   PetscCall(PetscDrawSave(draw));
2080   PetscFunctionReturn(PETSC_SUCCESS);
2081 }
2082 
2083 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2084 {
2085   PetscBool isascii, isbinary, isdraw;
2086 
2087   PetscFunctionBegin;
2088   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
2089   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2090   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
2091   if (isascii) {
2092     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
2093   } else if (isbinary) {
2094     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
2095   } else if (isdraw) {
2096     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
2097   } else {
2098     Mat B;
2099     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2100     PetscCall(MatView(B, viewer));
2101     PetscCall(MatDestroy(&B));
2102   }
2103   PetscFunctionReturn(PETSC_SUCCESS);
2104 }
2105 
2106 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2107 {
2108   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2109   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2110   PetscInt    *ai = a->i, *ailen = a->ilen;
2111   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
2112   MatScalar   *ap, *aa = a->a;
2113 
2114   PetscFunctionBegin;
2115   for (k = 0; k < m; k++) { /* loop over rows */
2116     row  = im[k];
2117     brow = row / bs;
2118     if (row < 0) {
2119       v += n;
2120       continue;
2121     } /* negative row */
2122     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2123     rp   = PetscSafePointerPlusOffset(aj, ai[brow]);
2124     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
2125     nrow = ailen[brow];
2126     for (l = 0; l < n; l++) { /* loop over columns */
2127       if (in[l] < 0) {
2128         v++;
2129         continue;
2130       } /* negative column */
2131       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
2132       col  = in[l];
2133       bcol = col / bs;
2134       cidx = col % bs;
2135       ridx = row % bs;
2136       high = nrow;
2137       low  = 0; /* assume unsorted */
2138       while (high - low > 5) {
2139         t = (low + high) / 2;
2140         if (rp[t] > bcol) high = t;
2141         else low = t;
2142       }
2143       for (i = low; i < high; i++) {
2144         if (rp[i] > bcol) break;
2145         if (rp[i] == bcol) {
2146           *v++ = ap[bs2 * i + bs * cidx + ridx];
2147           goto finished;
2148         }
2149       }
2150       *v++ = 0.0;
2151     finished:;
2152     }
2153   }
2154   PetscFunctionReturn(PETSC_SUCCESS);
2155 }
2156 
2157 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2158 {
2159   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2160   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2161   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2162   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2163   PetscBool          roworiented = a->roworiented;
2164   const PetscScalar *value       = v;
2165   MatScalar         *ap = NULL, *aa = a->a, *bap;
2166 
2167   PetscFunctionBegin;
2168   if (roworiented) {
2169     stepval = (n - 1) * bs;
2170   } else {
2171     stepval = (m - 1) * bs;
2172   }
2173   for (k = 0; k < m; k++) { /* loop over added rows */
2174     row = im[k];
2175     if (row < 0) continue;
2176     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
2177     rp = aj + ai[row];
2178     if (!A->structure_only) ap = aa + bs2 * ai[row];
2179     rmax = imax[row];
2180     nrow = ailen[row];
2181     low  = 0;
2182     high = nrow;
2183     for (l = 0; l < n; l++) { /* loop over added columns */
2184       if (in[l] < 0) continue;
2185       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
2186       col = in[l];
2187       if (!A->structure_only) {
2188         if (roworiented) {
2189           value = v + (k * (stepval + bs) + l) * bs;
2190         } else {
2191           value = v + (l * (stepval + bs) + k) * bs;
2192         }
2193       }
2194       if (col <= lastcol) low = 0;
2195       else high = nrow;
2196       lastcol = col;
2197       while (high - low > 7) {
2198         t = (low + high) / 2;
2199         if (rp[t] > col) high = t;
2200         else low = t;
2201       }
2202       for (i = low; i < high; i++) {
2203         if (rp[i] > col) break;
2204         if (rp[i] == col) {
2205           if (A->structure_only) goto noinsert2;
2206           bap = ap + bs2 * i;
2207           if (roworiented) {
2208             if (is == ADD_VALUES) {
2209               for (ii = 0; ii < bs; ii++, value += stepval) {
2210                 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2211               }
2212             } else {
2213               for (ii = 0; ii < bs; ii++, value += stepval) {
2214                 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2215               }
2216             }
2217           } else {
2218             if (is == ADD_VALUES) {
2219               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2220                 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
2221                 bap += bs;
2222               }
2223             } else {
2224               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2225                 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
2226                 bap += bs;
2227               }
2228             }
2229           }
2230           goto noinsert2;
2231         }
2232       }
2233       if (nonew == 1) goto noinsert2;
2234       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2235       if (A->structure_only) {
2236         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
2237       } else {
2238         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2239       }
2240       N = nrow++ - 1;
2241       high++;
2242       /* shift up all the later entries in this row */
2243       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2244       rp[i] = col;
2245       if (!A->structure_only) {
2246         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2247         bap = ap + bs2 * i;
2248         if (roworiented) {
2249           for (ii = 0; ii < bs; ii++, value += stepval) {
2250             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2251           }
2252         } else {
2253           for (ii = 0; ii < bs; ii++, value += stepval) {
2254             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2255           }
2256         }
2257       }
2258     noinsert2:;
2259       low = i;
2260     }
2261     ailen[row] = nrow;
2262   }
2263   PetscFunctionReturn(PETSC_SUCCESS);
2264 }
2265 
2266 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2267 {
2268   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2269   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2270   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2271   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2272   MatScalar   *aa    = a->a, *ap;
2273   PetscReal    ratio = 0.6;
2274 
2275   PetscFunctionBegin;
2276   if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS);
2277 
2278   if (m) rmax = ailen[0];
2279   for (i = 1; i < mbs; i++) {
2280     /* move each row back by the amount of empty slots (fshift) before it*/
2281     fshift += imax[i - 1] - ailen[i - 1];
2282     rmax = PetscMax(rmax, ailen[i]);
2283     if (fshift) {
2284       ip = aj + ai[i];
2285       ap = aa + bs2 * ai[i];
2286       N  = ailen[i];
2287       PetscCall(PetscArraymove(ip - fshift, ip, N));
2288       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2289     }
2290     ai[i] = ai[i - 1] + ailen[i - 1];
2291   }
2292   if (mbs) {
2293     fshift += imax[mbs - 1] - ailen[mbs - 1];
2294     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2295   }
2296 
2297   /* reset ilen and imax for each row */
2298   a->nonzerorowcnt = 0;
2299   if (A->structure_only) {
2300     PetscCall(PetscFree2(a->imax, a->ilen));
2301   } else { /* !A->structure_only */
2302     for (i = 0; i < mbs; i++) {
2303       ailen[i] = imax[i] = ai[i + 1] - ai[i];
2304       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2305     }
2306   }
2307   a->nz = ai[mbs];
2308 
2309   /* diagonals may have moved, so kill the diagonal pointers */
2310   a->idiagvalid = PETSC_FALSE;
2311   if (fshift && a->diag) PetscCall(PetscFree(a->diag));
2312   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
2313   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
2314   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
2315   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
2316 
2317   A->info.mallocs += a->reallocs;
2318   a->reallocs         = 0;
2319   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2320   a->rmax             = rmax;
2321 
2322   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
2323   PetscFunctionReturn(PETSC_SUCCESS);
2324 }
2325 
2326 /*
2327    This function returns an array of flags which indicate the locations of contiguous
2328    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2329    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2330    Assume: sizes should be long enough to hold all the values.
2331 */
2332 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2333 {
2334   PetscInt j = 0;
2335 
2336   PetscFunctionBegin;
2337   for (PetscInt i = 0; i < n; j++) {
2338     PetscInt row = idx[i];
2339     if (row % bs != 0) { /* Not the beginning of a block */
2340       sizes[j] = 1;
2341       i++;
2342     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2343       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2344       i++;
2345     } else { /* Beginning of the block, so check if the complete block exists */
2346       PetscBool flg = PETSC_TRUE;
2347       for (PetscInt k = 1; k < bs; k++) {
2348         if (row + k != idx[i + k]) { /* break in the block */
2349           flg = PETSC_FALSE;
2350           break;
2351         }
2352       }
2353       if (flg) { /* No break in the bs */
2354         sizes[j] = bs;
2355         i += bs;
2356       } else {
2357         sizes[j] = 1;
2358         i++;
2359       }
2360     }
2361   }
2362   *bs_max = j;
2363   PetscFunctionReturn(PETSC_SUCCESS);
2364 }
2365 
2366 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2367 {
2368   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2369   PetscInt           i, j, k, count, *rows;
2370   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
2371   PetscScalar        zero = 0.0;
2372   MatScalar         *aa;
2373   const PetscScalar *xx;
2374   PetscScalar       *bb;
2375 
2376   PetscFunctionBegin;
2377   /* fix right-hand side if needed */
2378   if (x && b) {
2379     PetscCall(VecGetArrayRead(x, &xx));
2380     PetscCall(VecGetArray(b, &bb));
2381     for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
2382     PetscCall(VecRestoreArrayRead(x, &xx));
2383     PetscCall(VecRestoreArray(b, &bb));
2384   }
2385 
2386   /* Make a copy of the IS and  sort it */
2387   /* allocate memory for rows,sizes */
2388   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2389 
2390   /* copy IS values to rows, and sort them */
2391   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
2392   PetscCall(PetscSortInt(is_n, rows));
2393 
2394   if (baij->keepnonzeropattern) {
2395     for (i = 0; i < is_n; i++) sizes[i] = 1;
2396     bs_max = is_n;
2397   } else {
2398     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2399     A->nonzerostate++;
2400   }
2401 
2402   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2403     row = rows[j];
2404     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2405     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2406     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2407     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2408       if (diag != (PetscScalar)0.0) {
2409         if (baij->ilen[row / bs] > 0) {
2410           baij->ilen[row / bs]       = 1;
2411           baij->j[baij->i[row / bs]] = row / bs;
2412 
2413           PetscCall(PetscArrayzero(aa, count * bs));
2414         }
2415         /* Now insert all the diagonal values for this bs */
2416         for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES);
2417       } else { /* (diag == 0.0) */
2418         baij->ilen[row / bs] = 0;
2419       } /* end (diag == 0.0) */
2420     } else { /* (sizes[i] != bs) */
2421       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2422       for (k = 0; k < count; k++) {
2423         aa[0] = zero;
2424         aa += bs;
2425       }
2426       if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES);
2427     }
2428   }
2429 
2430   PetscCall(PetscFree2(rows, sizes));
2431   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2432   PetscFunctionReturn(PETSC_SUCCESS);
2433 }
2434 
2435 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2436 {
2437   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2438   PetscInt           i, j, k, count;
2439   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
2440   PetscScalar        zero = 0.0;
2441   MatScalar         *aa;
2442   const PetscScalar *xx;
2443   PetscScalar       *bb;
2444   PetscBool         *zeroed, vecs = PETSC_FALSE;
2445 
2446   PetscFunctionBegin;
2447   /* fix right-hand side if needed */
2448   if (x && b) {
2449     PetscCall(VecGetArrayRead(x, &xx));
2450     PetscCall(VecGetArray(b, &bb));
2451     vecs = PETSC_TRUE;
2452   }
2453 
2454   /* zero the columns */
2455   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
2456   for (i = 0; i < is_n; i++) {
2457     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
2458     zeroed[is_idx[i]] = PETSC_TRUE;
2459   }
2460   for (i = 0; i < A->rmap->N; i++) {
2461     if (!zeroed[i]) {
2462       row = i / bs;
2463       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
2464         for (k = 0; k < bs; k++) {
2465           col = bs * baij->j[j] + k;
2466           if (zeroed[col]) {
2467             aa = baij->a + j * bs2 + (i % bs) + bs * k;
2468             if (vecs) bb[i] -= aa[0] * xx[col];
2469             aa[0] = 0.0;
2470           }
2471         }
2472       }
2473     } else if (vecs) bb[i] = diag * xx[i];
2474   }
2475   PetscCall(PetscFree(zeroed));
2476   if (vecs) {
2477     PetscCall(VecRestoreArrayRead(x, &xx));
2478     PetscCall(VecRestoreArray(b, &bb));
2479   }
2480 
2481   /* zero the rows */
2482   for (i = 0; i < is_n; i++) {
2483     row   = is_idx[i];
2484     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2485     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2486     for (k = 0; k < count; k++) {
2487       aa[0] = zero;
2488       aa += bs;
2489     }
2490     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
2491   }
2492   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2493   PetscFunctionReturn(PETSC_SUCCESS);
2494 }
2495 
2496 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2497 {
2498   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2499   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2500   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2501   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2502   PetscInt     ridx, cidx, bs2                 = a->bs2;
2503   PetscBool    roworiented = a->roworiented;
2504   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;
2505 
2506   PetscFunctionBegin;
2507   for (k = 0; k < m; k++) { /* loop over added rows */
2508     row  = im[k];
2509     brow = row / bs;
2510     if (row < 0) continue;
2511     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
2512     rp = PetscSafePointerPlusOffset(aj, ai[brow]);
2513     if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
2514     rmax = imax[brow];
2515     nrow = ailen[brow];
2516     low  = 0;
2517     high = nrow;
2518     for (l = 0; l < n; l++) { /* loop over added columns */
2519       if (in[l] < 0) continue;
2520       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
2521       col  = in[l];
2522       bcol = col / bs;
2523       ridx = row % bs;
2524       cidx = col % bs;
2525       if (!A->structure_only) {
2526         if (roworiented) {
2527           value = v[l + k * n];
2528         } else {
2529           value = v[k + l * m];
2530         }
2531       }
2532       if (col <= lastcol) low = 0;
2533       else high = nrow;
2534       lastcol = col;
2535       while (high - low > 7) {
2536         t = (low + high) / 2;
2537         if (rp[t] > bcol) high = t;
2538         else low = t;
2539       }
2540       for (i = low; i < high; i++) {
2541         if (rp[i] > bcol) break;
2542         if (rp[i] == bcol) {
2543           bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx);
2544           if (!A->structure_only) {
2545             if (is == ADD_VALUES) *bap += value;
2546             else *bap = value;
2547           }
2548           goto noinsert1;
2549         }
2550       }
2551       if (nonew == 1) goto noinsert1;
2552       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2553       if (A->structure_only) {
2554         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2555       } else {
2556         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2557       }
2558       N = nrow++ - 1;
2559       high++;
2560       /* shift up all the later entries in this row */
2561       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2562       rp[i] = bcol;
2563       if (!A->structure_only) {
2564         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2565         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2566         ap[bs2 * i + bs * cidx + ridx] = value;
2567       }
2568       a->nz++;
2569     noinsert1:;
2570       low = i;
2571     }
2572     ailen[brow] = nrow;
2573   }
2574   PetscFunctionReturn(PETSC_SUCCESS);
2575 }
2576 
2577 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2578 {
2579   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
2580   Mat          outA;
2581   PetscBool    row_identity, col_identity;
2582 
2583   PetscFunctionBegin;
2584   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
2585   PetscCall(ISIdentity(row, &row_identity));
2586   PetscCall(ISIdentity(col, &col_identity));
2587   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
2588 
2589   outA            = inA;
2590   inA->factortype = MAT_FACTOR_LU;
2591   PetscCall(PetscFree(inA->solvertype));
2592   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
2593 
2594   PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2595 
2596   PetscCall(PetscObjectReference((PetscObject)row));
2597   PetscCall(ISDestroy(&a->row));
2598   a->row = row;
2599   PetscCall(PetscObjectReference((PetscObject)col));
2600   PetscCall(ISDestroy(&a->col));
2601   a->col = col;
2602 
2603   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2604   PetscCall(ISDestroy(&a->icol));
2605   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2606 
2607   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2608   if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
2609   PetscCall(MatLUFactorNumeric(outA, inA, info));
2610   PetscFunctionReturn(PETSC_SUCCESS);
2611 }
2612 
2613 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2614 {
2615   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2616 
2617   PetscFunctionBegin;
2618   baij->nz = baij->maxnz;
2619   PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2620   PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
2621   PetscFunctionReturn(PETSC_SUCCESS);
2622 }
2623 
2624 /*@
2625   MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix.
2626 
2627   Input Parameters:
2628 + mat     - the `MATSEQBAIJ` matrix
2629 - indices - the block column indices
2630 
2631   Level: advanced
2632 
2633   Notes:
2634   This can be called if you have precomputed the nonzero structure of the
2635   matrix and want to provide it to the matrix object to improve the performance
2636   of the `MatSetValues()` operation.
2637 
2638   You MUST have set the correct numbers of nonzeros per row in the call to
2639   `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
2640 
2641   MUST be called before any calls to `MatSetValues()`
2642 
2643 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()`
2644 @*/
2645 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2646 {
2647   PetscFunctionBegin;
2648   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2649   PetscAssertPointer(indices, 2);
2650   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices));
2651   PetscFunctionReturn(PETSC_SUCCESS);
2652 }
2653 
2654 static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2655 {
2656   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2657   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2658   PetscReal    atmp;
2659   PetscScalar *x, zero = 0.0;
2660   MatScalar   *aa;
2661   PetscInt     ncols, brow, krow, kcol;
2662 
2663   PetscFunctionBegin;
2664   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2665   bs  = A->rmap->bs;
2666   aa  = a->a;
2667   ai  = a->i;
2668   aj  = a->j;
2669   mbs = a->mbs;
2670 
2671   PetscCall(VecSet(v, zero));
2672   PetscCall(VecGetArray(v, &x));
2673   PetscCall(VecGetLocalSize(v, &n));
2674   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2675   for (i = 0; i < mbs; i++) {
2676     ncols = ai[1] - ai[0];
2677     ai++;
2678     brow = bs * i;
2679     for (j = 0; j < ncols; j++) {
2680       for (kcol = 0; kcol < bs; kcol++) {
2681         for (krow = 0; krow < bs; krow++) {
2682           atmp = PetscAbsScalar(*aa);
2683           aa++;
2684           row = brow + krow; /* row index */
2685           if (PetscAbsScalar(x[row]) < atmp) {
2686             x[row] = atmp;
2687             if (idx) idx[row] = bs * (*aj) + kcol;
2688           }
2689         }
2690       }
2691       aj++;
2692     }
2693   }
2694   PetscCall(VecRestoreArray(v, &x));
2695   PetscFunctionReturn(PETSC_SUCCESS);
2696 }
2697 
2698 static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v)
2699 {
2700   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2701   PetscInt     i, j, n, row, bs, *ai, mbs;
2702   PetscReal    atmp;
2703   PetscScalar *x, zero = 0.0;
2704   MatScalar   *aa;
2705   PetscInt     ncols, brow, krow, kcol;
2706 
2707   PetscFunctionBegin;
2708   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2709   bs  = A->rmap->bs;
2710   aa  = a->a;
2711   ai  = a->i;
2712   mbs = a->mbs;
2713 
2714   PetscCall(VecSet(v, zero));
2715   PetscCall(VecGetArrayWrite(v, &x));
2716   PetscCall(VecGetLocalSize(v, &n));
2717   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2718   for (i = 0; i < mbs; i++) {
2719     ncols = ai[1] - ai[0];
2720     ai++;
2721     brow = bs * i;
2722     for (j = 0; j < ncols; j++) {
2723       for (kcol = 0; kcol < bs; kcol++) {
2724         for (krow = 0; krow < bs; krow++) {
2725           atmp = PetscAbsScalar(*aa);
2726           aa++;
2727           row = brow + krow; /* row index */
2728           x[row] += atmp;
2729         }
2730       }
2731     }
2732   }
2733   PetscCall(VecRestoreArrayWrite(v, &x));
2734   PetscFunctionReturn(PETSC_SUCCESS);
2735 }
2736 
2737 static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2738 {
2739   PetscFunctionBegin;
2740   /* If the two matrices have the same copy implementation, use fast copy. */
2741   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2742     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
2743     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2744     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
2745 
2746     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
2747     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
2748     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
2749     PetscCall(PetscObjectStateIncrease((PetscObject)B));
2750   } else {
2751     PetscCall(MatCopy_Basic(A, B, str));
2752   }
2753   PetscFunctionReturn(PETSC_SUCCESS);
2754 }
2755 
2756 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2757 {
2758   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2759 
2760   PetscFunctionBegin;
2761   *array = a->a;
2762   PetscFunctionReturn(PETSC_SUCCESS);
2763 }
2764 
2765 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2766 {
2767   PetscFunctionBegin;
2768   *array = NULL;
2769   PetscFunctionReturn(PETSC_SUCCESS);
2770 }
2771 
2772 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2773 {
2774   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
2775   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
2776   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
2777 
2778   PetscFunctionBegin;
2779   /* Set the number of nonzeros in the new matrix */
2780   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
2781   PetscFunctionReturn(PETSC_SUCCESS);
2782 }
2783 
2784 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2785 {
2786   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
2787   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2788   PetscBLASInt one = 1;
2789 
2790   PetscFunctionBegin;
2791   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2792     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2793     if (e) {
2794       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2795       if (e) {
2796         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2797         if (e) str = SAME_NONZERO_PATTERN;
2798       }
2799     }
2800     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2801   }
2802   if (str == SAME_NONZERO_PATTERN) {
2803     PetscScalar  alpha = a;
2804     PetscBLASInt bnz;
2805     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2806     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
2807     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2808   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2809     PetscCall(MatAXPY_Basic(Y, a, X, str));
2810   } else {
2811     Mat       B;
2812     PetscInt *nnz;
2813     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
2814     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
2815     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2816     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2817     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
2818     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
2819     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
2820     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
2821     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
2822     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2823     PetscCall(MatHeaderMerge(Y, &B));
2824     PetscCall(PetscFree(nnz));
2825   }
2826   PetscFunctionReturn(PETSC_SUCCESS);
2827 }
2828 
2829 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2830 {
2831 #if PetscDefined(USE_COMPLEX)
2832   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2833   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2834   MatScalar   *aa = a->a;
2835 
2836   PetscFunctionBegin;
2837   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
2838   PetscFunctionReturn(PETSC_SUCCESS);
2839 #else
2840   (void)A;
2841   return PETSC_SUCCESS;
2842 #endif
2843 }
2844 
2845 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2846 {
2847 #if PetscDefined(USE_COMPLEX)
2848   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2849   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2850   MatScalar   *aa = a->a;
2851 
2852   PetscFunctionBegin;
2853   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
2854   PetscFunctionReturn(PETSC_SUCCESS);
2855 #else
2856   (void)A;
2857   return PETSC_SUCCESS;
2858 #endif
2859 }
2860 
2861 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2862 {
2863 #if PetscDefined(USE_COMPLEX)
2864   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2865   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2866   MatScalar   *aa = a->a;
2867 
2868   PetscFunctionBegin;
2869   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2870   PetscFunctionReturn(PETSC_SUCCESS);
2871 #else
2872   (void)A;
2873   return PETSC_SUCCESS;
2874 #endif
2875 }
2876 
2877 /*
2878     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2879 */
2880 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2881 {
2882   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
2883   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
2884   PetscInt     nz = a->i[m], row, *jj, mr, col;
2885 
2886   PetscFunctionBegin;
2887   *nn = n;
2888   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2889   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
2890   PetscCall(PetscCalloc1(n, &collengths));
2891   PetscCall(PetscMalloc1(n + 1, &cia));
2892   PetscCall(PetscMalloc1(nz, &cja));
2893   jj = a->j;
2894   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2895   cia[0] = oshift;
2896   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2897   PetscCall(PetscArrayzero(collengths, n));
2898   jj = a->j;
2899   for (row = 0; row < m; row++) {
2900     mr = a->i[row + 1] - a->i[row];
2901     for (i = 0; i < mr; i++) {
2902       col = *jj++;
2903 
2904       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2905     }
2906   }
2907   PetscCall(PetscFree(collengths));
2908   *ia = cia;
2909   *ja = cja;
2910   PetscFunctionReturn(PETSC_SUCCESS);
2911 }
2912 
2913 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2914 {
2915   PetscFunctionBegin;
2916   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2917   PetscCall(PetscFree(*ia));
2918   PetscCall(PetscFree(*ja));
2919   PetscFunctionReturn(PETSC_SUCCESS);
2920 }
2921 
2922 /*
2923  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2924  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2925  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2926  */
2927 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2928 {
2929   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2930   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2931   PetscInt     nz = a->i[m], row, *jj, mr, col;
2932   PetscInt    *cspidx;
2933 
2934   PetscFunctionBegin;
2935   *nn = n;
2936   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2937 
2938   PetscCall(PetscCalloc1(n, &collengths));
2939   PetscCall(PetscMalloc1(n + 1, &cia));
2940   PetscCall(PetscMalloc1(nz, &cja));
2941   PetscCall(PetscMalloc1(nz, &cspidx));
2942   jj = a->j;
2943   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2944   cia[0] = oshift;
2945   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2946   PetscCall(PetscArrayzero(collengths, n));
2947   jj = a->j;
2948   for (row = 0; row < m; row++) {
2949     mr = a->i[row + 1] - a->i[row];
2950     for (i = 0; i < mr; i++) {
2951       col                                         = *jj++;
2952       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2953       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2954     }
2955   }
2956   PetscCall(PetscFree(collengths));
2957   *ia    = cia;
2958   *ja    = cja;
2959   *spidx = cspidx;
2960   PetscFunctionReturn(PETSC_SUCCESS);
2961 }
2962 
2963 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2964 {
2965   PetscFunctionBegin;
2966   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
2967   PetscCall(PetscFree(*spidx));
2968   PetscFunctionReturn(PETSC_SUCCESS);
2969 }
2970 
2971 static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2972 {
2973   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
2974 
2975   PetscFunctionBegin;
2976   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
2977   PetscCall(MatShift_Basic(Y, a));
2978   PetscFunctionReturn(PETSC_SUCCESS);
2979 }
2980 
2981 PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep)
2982 {
2983   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2984   PetscInt     fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k;
2985   PetscInt     m = A->rmap->N, *ailen = a->ilen;
2986   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2987   MatScalar   *aa = a->a, *ap;
2988   PetscBool    zero;
2989 
2990   PetscFunctionBegin;
2991   PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix");
2992   if (m) rmax = ailen[0];
2993   for (i = 1; i <= mbs; i++) {
2994     for (k = ai[i - 1]; k < ai[i]; k++) {
2995       zero = PETSC_TRUE;
2996       ap   = aa + bs2 * k;
2997       for (j = 0; j < bs2 && zero; j++) {
2998         if (ap[j] != 0.0) zero = PETSC_FALSE;
2999       }
3000       if (zero && (aj[k] != i - 1 || !keep)) fshift++;
3001       else {
3002         if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1));
3003         aj[k - fshift] = aj[k];
3004         PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2));
3005       }
3006     }
3007     ai[i - 1] -= fshift_prev;
3008     fshift_prev  = fshift;
3009     ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1];
3010     a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0);
3011     rmax = PetscMax(rmax, ailen[i - 1]);
3012   }
3013   if (fshift) {
3014     if (mbs) {
3015       ai[mbs] -= fshift;
3016       a->nz = ai[mbs];
3017     }
3018     PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz));
3019     A->nonzerostate++;
3020     A->info.nz_unneeded += (PetscReal)fshift;
3021     a->rmax = rmax;
3022     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
3023     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
3024   }
3025   PetscFunctionReturn(PETSC_SUCCESS);
3026 }
3027 
3028 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
3029                                        MatGetRow_SeqBAIJ,
3030                                        MatRestoreRow_SeqBAIJ,
3031                                        MatMult_SeqBAIJ_N,
3032                                        /* 4*/ MatMultAdd_SeqBAIJ_N,
3033                                        MatMultTranspose_SeqBAIJ,
3034                                        MatMultTransposeAdd_SeqBAIJ,
3035                                        NULL,
3036                                        NULL,
3037                                        NULL,
3038                                        /* 10*/ NULL,
3039                                        MatLUFactor_SeqBAIJ,
3040                                        NULL,
3041                                        NULL,
3042                                        MatTranspose_SeqBAIJ,
3043                                        /* 15*/ MatGetInfo_SeqBAIJ,
3044                                        MatEqual_SeqBAIJ,
3045                                        MatGetDiagonal_SeqBAIJ,
3046                                        MatDiagonalScale_SeqBAIJ,
3047                                        MatNorm_SeqBAIJ,
3048                                        /* 20*/ NULL,
3049                                        MatAssemblyEnd_SeqBAIJ,
3050                                        MatSetOption_SeqBAIJ,
3051                                        MatZeroEntries_SeqBAIJ,
3052                                        /* 24*/ MatZeroRows_SeqBAIJ,
3053                                        NULL,
3054                                        NULL,
3055                                        NULL,
3056                                        NULL,
3057                                        /* 29*/ MatSetUp_Seq_Hash,
3058                                        NULL,
3059                                        NULL,
3060                                        NULL,
3061                                        NULL,
3062                                        /* 34*/ MatDuplicate_SeqBAIJ,
3063                                        NULL,
3064                                        NULL,
3065                                        MatILUFactor_SeqBAIJ,
3066                                        NULL,
3067                                        /* 39*/ MatAXPY_SeqBAIJ,
3068                                        MatCreateSubMatrices_SeqBAIJ,
3069                                        MatIncreaseOverlap_SeqBAIJ,
3070                                        MatGetValues_SeqBAIJ,
3071                                        MatCopy_SeqBAIJ,
3072                                        /* 44*/ NULL,
3073                                        MatScale_SeqBAIJ,
3074                                        MatShift_SeqBAIJ,
3075                                        NULL,
3076                                        MatZeroRowsColumns_SeqBAIJ,
3077                                        /* 49*/ NULL,
3078                                        MatGetRowIJ_SeqBAIJ,
3079                                        MatRestoreRowIJ_SeqBAIJ,
3080                                        MatGetColumnIJ_SeqBAIJ,
3081                                        MatRestoreColumnIJ_SeqBAIJ,
3082                                        /* 54*/ MatFDColoringCreate_SeqXAIJ,
3083                                        NULL,
3084                                        NULL,
3085                                        NULL,
3086                                        MatSetValuesBlocked_SeqBAIJ,
3087                                        /* 59*/ MatCreateSubMatrix_SeqBAIJ,
3088                                        MatDestroy_SeqBAIJ,
3089                                        MatView_SeqBAIJ,
3090                                        NULL,
3091                                        NULL,
3092                                        /* 64*/ NULL,
3093                                        NULL,
3094                                        NULL,
3095                                        NULL,
3096                                        MatGetRowMaxAbs_SeqBAIJ,
3097                                        /* 69*/ NULL,
3098                                        MatConvert_Basic,
3099                                        NULL,
3100                                        MatFDColoringApply_BAIJ,
3101                                        NULL,
3102                                        /* 74*/ NULL,
3103                                        NULL,
3104                                        NULL,
3105                                        NULL,
3106                                        MatLoad_SeqBAIJ,
3107                                        /* 79*/ NULL,
3108                                        NULL,
3109                                        NULL,
3110                                        NULL,
3111                                        NULL,
3112                                        /* 84*/ NULL,
3113                                        NULL,
3114                                        NULL,
3115                                        NULL,
3116                                        NULL,
3117                                        /* 89*/ NULL,
3118                                        NULL,
3119                                        NULL,
3120                                        NULL,
3121                                        MatConjugate_SeqBAIJ,
3122                                        /* 94*/ NULL,
3123                                        NULL,
3124                                        MatRealPart_SeqBAIJ,
3125                                        MatImaginaryPart_SeqBAIJ,
3126                                        NULL,
3127                                        /* 99*/ NULL,
3128                                        NULL,
3129                                        NULL,
3130                                        NULL,
3131                                        NULL,
3132                                        /*104*/ MatMissingDiagonal_SeqBAIJ,
3133                                        NULL,
3134                                        NULL,
3135                                        NULL,
3136                                        NULL,
3137                                        /*109*/ NULL,
3138                                        NULL,
3139                                        NULL,
3140                                        MatMultHermitianTranspose_SeqBAIJ,
3141                                        MatMultHermitianTransposeAdd_SeqBAIJ,
3142                                        /*114*/ NULL,
3143                                        NULL,
3144                                        MatGetColumnReductions_SeqBAIJ,
3145                                        MatInvertBlockDiagonal_SeqBAIJ,
3146                                        NULL,
3147                                        /*119*/ NULL,
3148                                        NULL,
3149                                        NULL,
3150                                        NULL,
3151                                        NULL,
3152                                        /*124*/ NULL,
3153                                        NULL,
3154                                        NULL,
3155                                        MatSetBlockSizes_Default,
3156                                        NULL,
3157                                        /*129*/ MatFDColoringSetUp_SeqXAIJ,
3158                                        NULL,
3159                                        MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3160                                        MatDestroySubMatrices_SeqBAIJ,
3161                                        NULL,
3162                                        /*134*/ NULL,
3163                                        NULL,
3164                                        NULL,
3165                                        MatEliminateZeros_SeqBAIJ,
3166                                        MatGetRowSumAbs_SeqBAIJ,
3167                                        /*139*/ NULL,
3168                                        NULL,
3169                                        NULL,
3170                                        MatCopyHashToXAIJ_Seq_Hash,
3171                                        NULL,
3172                                        NULL};
3173 
3174 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3175 {
3176   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3177   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3178 
3179   PetscFunctionBegin;
3180   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3181 
3182   /* allocate space for values if not already there */
3183   if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
3184 
3185   /* copy values over */
3186   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
3187   PetscFunctionReturn(PETSC_SUCCESS);
3188 }
3189 
3190 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3191 {
3192   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3193   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3194 
3195   PetscFunctionBegin;
3196   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3197   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
3198 
3199   /* copy values over */
3200   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
3201   PetscFunctionReturn(PETSC_SUCCESS);
3202 }
3203 
3204 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3205 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3206 
3207 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3208 {
3209   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
3210   PetscInt     i, mbs, nbs, bs2;
3211   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3212 
3213   PetscFunctionBegin;
3214   if (B->hash_active) {
3215     PetscInt bs;
3216     B->ops[0] = b->cops;
3217     PetscCall(PetscHMapIJVDestroy(&b->ht));
3218     PetscCall(MatGetBlockSize(B, &bs));
3219     if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht));
3220     PetscCall(PetscFree(b->dnz));
3221     PetscCall(PetscFree(b->bdnz));
3222     B->hash_active = PETSC_FALSE;
3223   }
3224   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3225   if (nz == MAT_SKIP_ALLOCATION) {
3226     skipallocation = PETSC_TRUE;
3227     nz             = 0;
3228   }
3229 
3230   PetscCall(MatSetBlockSize(B, bs));
3231   PetscCall(PetscLayoutSetUp(B->rmap));
3232   PetscCall(PetscLayoutSetUp(B->cmap));
3233   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3234 
3235   B->preallocated = PETSC_TRUE;
3236 
3237   mbs = B->rmap->n / bs;
3238   nbs = B->cmap->n / bs;
3239   bs2 = bs * bs;
3240 
3241   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3242 
3243   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3244   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3245   if (nnz) {
3246     for (i = 0; i < mbs; i++) {
3247       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
3248       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3249     }
3250   }
3251 
3252   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
3253   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3254   PetscOptionsEnd();
3255 
3256   if (!flg) {
3257     switch (bs) {
3258     case 1:
3259       B->ops->mult    = MatMult_SeqBAIJ_1;
3260       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3261       break;
3262     case 2:
3263       B->ops->mult    = MatMult_SeqBAIJ_2;
3264       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3265       break;
3266     case 3:
3267       B->ops->mult    = MatMult_SeqBAIJ_3;
3268       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3269       break;
3270     case 4:
3271       B->ops->mult    = MatMult_SeqBAIJ_4;
3272       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3273       break;
3274     case 5:
3275       B->ops->mult    = MatMult_SeqBAIJ_5;
3276       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3277       break;
3278     case 6:
3279       B->ops->mult    = MatMult_SeqBAIJ_6;
3280       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3281       break;
3282     case 7:
3283       B->ops->mult    = MatMult_SeqBAIJ_7;
3284       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3285       break;
3286     case 9: {
3287       PetscInt version = 1;
3288       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3289       switch (version) {
3290 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3291       case 1:
3292         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
3293         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
3294         PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3295         break;
3296 #endif
3297       default:
3298         B->ops->mult    = MatMult_SeqBAIJ_N;
3299         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3300         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3301         break;
3302       }
3303       break;
3304     }
3305     case 11:
3306       B->ops->mult    = MatMult_SeqBAIJ_11;
3307       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3308       break;
3309     case 12: {
3310       PetscInt version = 1;
3311       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3312       switch (version) {
3313       case 1:
3314         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
3315         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3316         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3317         break;
3318       case 2:
3319         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
3320         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
3321         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3322         break;
3323 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3324       case 3:
3325         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
3326         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3327         PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3328         break;
3329 #endif
3330       default:
3331         B->ops->mult    = MatMult_SeqBAIJ_N;
3332         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3333         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3334         break;
3335       }
3336       break;
3337     }
3338     case 15: {
3339       PetscInt version = 1;
3340       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3341       switch (version) {
3342       case 1:
3343         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3344         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3345         break;
3346       case 2:
3347         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
3348         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3349         break;
3350       case 3:
3351         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3352         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3353         break;
3354       case 4:
3355         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3356         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3357         break;
3358       default:
3359         B->ops->mult = MatMult_SeqBAIJ_N;
3360         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3361         break;
3362       }
3363       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3364       break;
3365     }
3366     default:
3367       B->ops->mult    = MatMult_SeqBAIJ_N;
3368       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3369       PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3370       break;
3371     }
3372   }
3373   B->ops->sor = MatSOR_SeqBAIJ;
3374   b->mbs      = mbs;
3375   b->nbs      = nbs;
3376   if (!skipallocation) {
3377     if (!b->imax) {
3378       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
3379 
3380       b->free_imax_ilen = PETSC_TRUE;
3381     }
3382     /* b->ilen will count nonzeros in each block row so far. */
3383     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3384     if (!nnz) {
3385       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3386       else if (nz < 0) nz = 1;
3387       nz = PetscMin(nz, nbs);
3388       for (i = 0; i < mbs; i++) b->imax[i] = nz;
3389       PetscCall(PetscIntMultError(nz, mbs, &nz));
3390     } else {
3391       PetscInt64 nz64 = 0;
3392       for (i = 0; i < mbs; i++) {
3393         b->imax[i] = nnz[i];
3394         nz64 += nnz[i];
3395       }
3396       PetscCall(PetscIntCast(nz64, &nz));
3397     }
3398 
3399     /* allocate the matrix space */
3400     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3401     PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j));
3402     PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i));
3403     if (B->structure_only) {
3404       b->free_a = PETSC_FALSE;
3405     } else {
3406       PetscInt nzbs2 = 0;
3407       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
3408       PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a));
3409       b->free_a = PETSC_TRUE;
3410       PetscCall(PetscArrayzero(b->a, nz * bs2));
3411     }
3412     b->free_ij = PETSC_TRUE;
3413     PetscCall(PetscArrayzero(b->j, nz));
3414 
3415     b->i[0] = 0;
3416     for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3417   } else {
3418     b->free_a  = PETSC_FALSE;
3419     b->free_ij = PETSC_FALSE;
3420   }
3421 
3422   b->bs2              = bs2;
3423   b->mbs              = mbs;
3424   b->nz               = 0;
3425   b->maxnz            = nz;
3426   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3427   B->was_assembled    = PETSC_FALSE;
3428   B->assembled        = PETSC_FALSE;
3429   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3430   PetscFunctionReturn(PETSC_SUCCESS);
3431 }
3432 
3433 static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3434 {
3435   PetscInt     i, m, nz, nz_max = 0, *nnz;
3436   PetscScalar *values      = NULL;
3437   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3438 
3439   PetscFunctionBegin;
3440   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
3441   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
3442   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
3443   PetscCall(PetscLayoutSetUp(B->rmap));
3444   PetscCall(PetscLayoutSetUp(B->cmap));
3445   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3446   m = B->rmap->n / bs;
3447 
3448   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
3449   PetscCall(PetscMalloc1(m + 1, &nnz));
3450   for (i = 0; i < m; i++) {
3451     nz = ii[i + 1] - ii[i];
3452     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3453     nz_max = PetscMax(nz_max, nz);
3454     nnz[i] = nz;
3455   }
3456   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
3457   PetscCall(PetscFree(nnz));
3458 
3459   values = (PetscScalar *)V;
3460   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3461   for (i = 0; i < m; i++) {
3462     PetscInt        ncols = ii[i + 1] - ii[i];
3463     const PetscInt *icols = jj + ii[i];
3464     if (bs == 1 || !roworiented) {
3465       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
3466       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
3467     } else {
3468       PetscInt j;
3469       for (j = 0; j < ncols; j++) {
3470         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
3471         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
3472       }
3473     }
3474   }
3475   if (!V) PetscCall(PetscFree(values));
3476   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3477   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3478   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3479   PetscFunctionReturn(PETSC_SUCCESS);
3480 }
3481 
3482 /*@C
3483   MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3484 
3485   Not Collective
3486 
3487   Input Parameter:
3488 . A - a `MATSEQBAIJ` matrix
3489 
3490   Output Parameter:
3491 . array - pointer to the data
3492 
3493   Level: intermediate
3494 
3495 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3496 @*/
3497 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[])
3498 {
3499   PetscFunctionBegin;
3500   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
3501   PetscFunctionReturn(PETSC_SUCCESS);
3502 }
3503 
3504 /*@C
3505   MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3506 
3507   Not Collective
3508 
3509   Input Parameters:
3510 + A     - a `MATSEQBAIJ` matrix
3511 - array - pointer to the data
3512 
3513   Level: intermediate
3514 
3515 .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3516 @*/
3517 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[])
3518 {
3519   PetscFunctionBegin;
3520   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
3521   PetscFunctionReturn(PETSC_SUCCESS);
3522 }
3523 
3524 /*MC
3525    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3526    block sparse compressed row format.
3527 
3528    Options Database Keys:
3529 + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()`
3530 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3531 
3532    Level: beginner
3533 
3534    Notes:
3535    `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
3536    space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
3537 
3538    Run with `-info` to see what version of the matrix-vector product is being used
3539 
3540 .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()`
3541 M*/
3542 
3543 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3544 
3545 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3546 {
3547   PetscMPIInt  size;
3548   Mat_SeqBAIJ *b;
3549 
3550   PetscFunctionBegin;
3551   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
3552   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3553 
3554   PetscCall(PetscNew(&b));
3555   B->data   = (void *)b;
3556   B->ops[0] = MatOps_Values;
3557 
3558   b->row          = NULL;
3559   b->col          = NULL;
3560   b->icol         = NULL;
3561   b->reallocs     = 0;
3562   b->saved_values = NULL;
3563 
3564   b->roworiented        = PETSC_TRUE;
3565   b->nonew              = 0;
3566   b->diag               = NULL;
3567   B->spptr              = NULL;
3568   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3569   b->keepnonzeropattern = PETSC_FALSE;
3570 
3571   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
3572   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
3573   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
3574   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
3575   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
3576   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
3577   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
3578   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
3579   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
3580   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
3581 #if defined(PETSC_HAVE_HYPRE)
3582   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
3583 #endif
3584   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
3585   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
3586   PetscFunctionReturn(PETSC_SUCCESS);
3587 }
3588 
3589 PETSC_INTERN PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3590 {
3591   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3592   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3593 
3594   PetscFunctionBegin;
3595   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix");
3596   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
3597 
3598   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3599     c->imax           = a->imax;
3600     c->ilen           = a->ilen;
3601     c->free_imax_ilen = PETSC_FALSE;
3602   } else {
3603     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3604     for (i = 0; i < mbs; i++) {
3605       c->imax[i] = a->imax[i];
3606       c->ilen[i] = a->ilen[i];
3607     }
3608     c->free_imax_ilen = PETSC_TRUE;
3609   }
3610 
3611   /* allocate the matrix space */
3612   if (mallocmatspace) {
3613     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3614       PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
3615       PetscCall(PetscArrayzero(c->a, bs2 * nz));
3616       c->free_a       = PETSC_TRUE;
3617       c->i            = a->i;
3618       c->j            = a->j;
3619       c->free_ij      = PETSC_FALSE;
3620       c->parent       = A;
3621       C->preallocated = PETSC_TRUE;
3622       C->assembled    = PETSC_TRUE;
3623 
3624       PetscCall(PetscObjectReference((PetscObject)A));
3625       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3626       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3627     } else {
3628       PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
3629       PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j));
3630       PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i));
3631       c->free_a  = PETSC_TRUE;
3632       c->free_ij = PETSC_TRUE;
3633 
3634       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3635       if (mbs > 0) {
3636         PetscCall(PetscArraycpy(c->j, a->j, nz));
3637         if (cpvalues == MAT_COPY_VALUES) {
3638           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
3639         } else {
3640           PetscCall(PetscArrayzero(c->a, bs2 * nz));
3641         }
3642       }
3643       C->preallocated = PETSC_TRUE;
3644       C->assembled    = PETSC_TRUE;
3645     }
3646   }
3647 
3648   c->roworiented = a->roworiented;
3649   c->nonew       = a->nonew;
3650 
3651   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
3652   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
3653 
3654   c->bs2 = a->bs2;
3655   c->mbs = a->mbs;
3656   c->nbs = a->nbs;
3657 
3658   if (a->diag) {
3659     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3660       c->diag      = a->diag;
3661       c->free_diag = PETSC_FALSE;
3662     } else {
3663       PetscCall(PetscMalloc1(mbs + 1, &c->diag));
3664       for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i];
3665       c->free_diag = PETSC_TRUE;
3666     }
3667   } else c->diag = NULL;
3668 
3669   c->nz         = a->nz;
3670   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3671   c->solve_work = NULL;
3672   c->mult_work  = NULL;
3673   c->sor_workt  = NULL;
3674   c->sor_work   = NULL;
3675 
3676   c->compressedrow.use   = a->compressedrow.use;
3677   c->compressedrow.nrows = a->compressedrow.nrows;
3678   if (a->compressedrow.use) {
3679     i = a->compressedrow.nrows;
3680     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
3681     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
3682     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
3683   } else {
3684     c->compressedrow.use    = PETSC_FALSE;
3685     c->compressedrow.i      = NULL;
3686     c->compressedrow.rindex = NULL;
3687   }
3688   c->nonzerorowcnt = a->nonzerorowcnt;
3689   C->nonzerostate  = A->nonzerostate;
3690 
3691   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
3692   PetscFunctionReturn(PETSC_SUCCESS);
3693 }
3694 
3695 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3696 {
3697   PetscFunctionBegin;
3698   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
3699   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
3700   PetscCall(MatSetType(*B, MATSEQBAIJ));
3701   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
3702   PetscFunctionReturn(PETSC_SUCCESS);
3703 }
3704 
3705 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3706 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3707 {
3708   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3709   PetscInt    *rowidxs, *colidxs;
3710   PetscScalar *matvals;
3711 
3712   PetscFunctionBegin;
3713   PetscCall(PetscViewerSetUp(viewer));
3714 
3715   /* read matrix header */
3716   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3717   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3718   M  = header[1];
3719   N  = header[2];
3720   nz = header[3];
3721   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3722   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3723   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3724 
3725   /* set block sizes from the viewer's .info file */
3726   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3727   /* set local and global sizes if not set already */
3728   if (mat->rmap->n < 0) mat->rmap->n = M;
3729   if (mat->cmap->n < 0) mat->cmap->n = N;
3730   if (mat->rmap->N < 0) mat->rmap->N = M;
3731   if (mat->cmap->N < 0) mat->cmap->N = N;
3732   PetscCall(PetscLayoutSetUp(mat->rmap));
3733   PetscCall(PetscLayoutSetUp(mat->cmap));
3734 
3735   /* check if the matrix sizes are correct */
3736   PetscCall(MatGetSize(mat, &rows, &cols));
3737   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3738   PetscCall(MatGetBlockSize(mat, &bs));
3739   PetscCall(MatGetLocalSize(mat, &m, &n));
3740   mbs = m / bs;
3741   nbs = n / bs;
3742 
3743   /* read in row lengths, column indices and nonzero values */
3744   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3745   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
3746   rowidxs[0] = 0;
3747   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3748   sum = rowidxs[m];
3749   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3750 
3751   /* read in column indices and nonzero values */
3752   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
3753   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
3754   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3755 
3756   {               /* preallocate matrix storage */
3757     PetscBT   bt; /* helper bit set to count nonzeros */
3758     PetscInt *nnz;
3759     PetscBool sbaij;
3760 
3761     PetscCall(PetscBTCreate(nbs, &bt));
3762     PetscCall(PetscCalloc1(mbs, &nnz));
3763     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3764     for (i = 0; i < mbs; i++) {
3765       PetscCall(PetscBTMemzero(nbs, bt));
3766       for (k = 0; k < bs; k++) {
3767         PetscInt row = bs * i + k;
3768         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3769           PetscInt col = colidxs[j];
3770           if (!sbaij || col >= row)
3771             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3772         }
3773       }
3774     }
3775     PetscCall(PetscBTDestroy(&bt));
3776     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
3777     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
3778     PetscCall(PetscFree(nnz));
3779   }
3780 
3781   /* store matrix values */
3782   for (i = 0; i < m; i++) {
3783     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
3784     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3785   }
3786 
3787   PetscCall(PetscFree(rowidxs));
3788   PetscCall(PetscFree2(colidxs, matvals));
3789   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3790   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3791   PetscFunctionReturn(PETSC_SUCCESS);
3792 }
3793 
3794 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3795 {
3796   PetscBool isbinary;
3797 
3798   PetscFunctionBegin;
3799   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3800   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3801   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
3802   PetscFunctionReturn(PETSC_SUCCESS);
3803 }
3804 
3805 /*@
3806   MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3807   compressed row) format.  For good matrix assembly performance the
3808   user should preallocate the matrix storage by setting the parameter `nz`
3809   (or the array `nnz`).
3810 
3811   Collective
3812 
3813   Input Parameters:
3814 + comm - MPI communicator, set to `PETSC_COMM_SELF`
3815 . bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3816          blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3817 . m    - number of rows
3818 . n    - number of columns
3819 . nz   - number of nonzero blocks  per block row (same for all rows)
3820 - nnz  - array containing the number of nonzero blocks in the various block rows
3821          (possibly different for each block row) or `NULL`
3822 
3823   Output Parameter:
3824 . A - the matrix
3825 
3826   Options Database Keys:
3827 + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3828 - -mat_block_size - size of the blocks to use
3829 
3830   Level: intermediate
3831 
3832   Notes:
3833   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3834   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3835   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
3836 
3837   The number of rows and columns must be divisible by blocksize.
3838 
3839   If the `nnz` parameter is given then the `nz` parameter is ignored
3840 
3841   A nonzero block is any block that as 1 or more nonzeros in it
3842 
3843   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3844   storage.  That is, the stored row and column indices can begin at
3845   either one (as in Fortran) or zero.
3846 
3847   Specify the preallocated storage with either `nz` or `nnz` (not both).
3848   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3849   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3850   matrices.
3851 
3852 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3853 @*/
3854 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3855 {
3856   PetscFunctionBegin;
3857   PetscCall(MatCreate(comm, A));
3858   PetscCall(MatSetSizes(*A, m, n, m, n));
3859   PetscCall(MatSetType(*A, MATSEQBAIJ));
3860   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
3861   PetscFunctionReturn(PETSC_SUCCESS);
3862 }
3863 
3864 /*@
3865   MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3866   per row in the matrix. For good matrix assembly performance the
3867   user should preallocate the matrix storage by setting the parameter `nz`
3868   (or the array `nnz`).
3869 
3870   Collective
3871 
3872   Input Parameters:
3873 + B   - the matrix
3874 . bs  - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3875         blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3876 . nz  - number of block nonzeros per block row (same for all rows)
3877 - nnz - array containing the number of block nonzeros in the various block rows
3878         (possibly different for each block row) or `NULL`
3879 
3880   Options Database Keys:
3881 + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3882 - -mat_block_size - size of the blocks to use
3883 
3884   Level: intermediate
3885 
3886   Notes:
3887   If the `nnz` parameter is given then the `nz` parameter is ignored
3888 
3889   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3890   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3891   You can also run with the option `-info` and look for messages with the string
3892   malloc in them to see if additional memory allocation was needed.
3893 
3894   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3895   storage.  That is, the stored row and column indices can begin at
3896   either one (as in Fortran) or zero.
3897 
3898   Specify the preallocated storage with either `nz` or `nnz` (not both).
3899   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3900   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3901 
3902 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3903 @*/
3904 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3905 {
3906   PetscFunctionBegin;
3907   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3908   PetscValidType(B, 1);
3909   PetscValidLogicalCollectiveInt(B, bs, 2);
3910   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
3911   PetscFunctionReturn(PETSC_SUCCESS);
3912 }
3913 
3914 /*@C
3915   MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3916 
3917   Collective
3918 
3919   Input Parameters:
3920 + B  - the matrix
3921 . bs - the blocksize
3922 . i  - the indices into `j` for the start of each local row (indices start with zero)
3923 . j  - the column indices for each local row (indices start with zero) these must be sorted for each row
3924 - v  - optional values in the matrix, use `NULL` if not provided
3925 
3926   Level: advanced
3927 
3928   Notes:
3929   The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()`
3930 
3931   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
3932   may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
3933   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3934   `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3935   block column and the second index is over columns within a block.
3936 
3937   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3938 
3939 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3940 @*/
3941 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3942 {
3943   PetscFunctionBegin;
3944   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3945   PetscValidType(B, 1);
3946   PetscValidLogicalCollectiveInt(B, bs, 2);
3947   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
3948   PetscFunctionReturn(PETSC_SUCCESS);
3949 }
3950 
3951 /*@
3952   MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3953 
3954   Collective
3955 
3956   Input Parameters:
3957 + comm - must be an MPI communicator of size 1
3958 . bs   - size of block
3959 . m    - number of rows
3960 . n    - number of columns
3961 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3962 . j    - column indices
3963 - a    - matrix values
3964 
3965   Output Parameter:
3966 . mat - the matrix
3967 
3968   Level: advanced
3969 
3970   Notes:
3971   The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays
3972   once the matrix is destroyed
3973 
3974   You cannot set new nonzero locations into this matrix, that will generate an error.
3975 
3976   The `i` and `j` indices are 0 based
3977 
3978   When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3979 
3980   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3981   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3982   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3983   with column-major ordering within blocks.
3984 
3985 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3986 @*/
3987 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3988 {
3989   Mat_SeqBAIJ *baij;
3990 
3991   PetscFunctionBegin;
3992   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
3993   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3994 
3995   PetscCall(MatCreate(comm, mat));
3996   PetscCall(MatSetSizes(*mat, m, n, m, n));
3997   PetscCall(MatSetType(*mat, MATSEQBAIJ));
3998   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3999   baij = (Mat_SeqBAIJ *)(*mat)->data;
4000   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
4001 
4002   baij->i = i;
4003   baij->j = j;
4004   baij->a = a;
4005 
4006   baij->nonew          = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
4007   baij->free_a         = PETSC_FALSE;
4008   baij->free_ij        = PETSC_FALSE;
4009   baij->free_imax_ilen = PETSC_TRUE;
4010 
4011   for (PetscInt ii = 0; ii < m; ii++) {
4012     const PetscInt row_len = i[ii + 1] - i[ii];
4013 
4014     baij->ilen[ii] = baij->imax[ii] = row_len;
4015     PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
4016   }
4017   if (PetscDefined(USE_DEBUG)) {
4018     for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
4019       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
4020       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
4021     }
4022   }
4023 
4024   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
4025   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
4026   PetscFunctionReturn(PETSC_SUCCESS);
4027 }
4028 
4029 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4030 {
4031   PetscFunctionBegin;
4032   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
4033   PetscFunctionReturn(PETSC_SUCCESS);
4034 }
4035