xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 2ff79c18c26c94ed8cb599682f680f231dca6444)
1 /*
2     Defines the basic matrix operations for the BAIJ (compressed row)
3   matrix storage format.
4 */
5 #include <../src/mat/impls/baij/seq/baij.h> /*I   "petscmat.h"  I*/
6 #include <petscblaslapack.h>
7 #include <petsc/private/kernels/blockinvert.h>
8 #include <petsc/private/kernels/blockmatmult.h>
9 
10 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */
11 #define TYPE BAIJ
12 #define TYPE_BS
13 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
14 #undef TYPE_BS
15 #define TYPE_BS _BS
16 #define TYPE_BS_ON
17 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
18 #undef TYPE_BS
19 #include "../src/mat/impls/aij/seq/seqhashmat.h"
20 #undef TYPE
21 #undef TYPE_BS_ON
22 
23 #if defined(PETSC_HAVE_HYPRE)
24 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
25 #endif
26 
27 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
28 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
29 #endif
30 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
31 
32 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
33 {
34   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
35   PetscInt     m, n, ib, jb, bs = A->rmap->bs;
36   MatScalar   *a_val = a_aij->a;
37 
38   PetscFunctionBegin;
39   PetscCall(MatGetSize(A, &m, &n));
40   PetscCall(PetscArrayzero(reductions, n));
41   if (type == NORM_2) {
42     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
43       for (jb = 0; jb < bs; jb++) {
44         for (ib = 0; ib < bs; ib++) {
45           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
46           a_val++;
47         }
48       }
49     }
50   } else if (type == NORM_1) {
51     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
52       for (jb = 0; jb < bs; jb++) {
53         for (ib = 0; ib < bs; ib++) {
54           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
55           a_val++;
56         }
57       }
58     }
59   } else if (type == NORM_INFINITY) {
60     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
61       for (jb = 0; jb < bs; jb++) {
62         for (ib = 0; ib < bs; ib++) {
63           PetscInt col    = A->cmap->rstart + a_aij->j[i] * bs + jb;
64           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
65           a_val++;
66         }
67       }
68     }
69   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
70     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
71       for (jb = 0; jb < bs; jb++) {
72         for (ib = 0; ib < bs; ib++) {
73           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
74           a_val++;
75         }
76       }
77     }
78   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
79     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
80       for (jb = 0; jb < bs; jb++) {
81         for (ib = 0; ib < bs; ib++) {
82           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
83           a_val++;
84         }
85       }
86     }
87   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
88   if (type == NORM_2) {
89     for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
90   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
91     for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
92   }
93   PetscFunctionReturn(PETSC_SUCCESS);
94 }
95 
96 static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
97 {
98   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
99   PetscInt    *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
100   MatScalar   *v     = a->a, *odiag, *diag, work[25], *v_work;
101   PetscReal    shift = 0.0;
102   PetscBool    allowzeropivot, zeropivotdetected = PETSC_FALSE;
103 
104   PetscFunctionBegin;
105   allowzeropivot = PetscNot(A->erroriffailure);
106 
107   if (a->idiagvalid) {
108     if (values) *values = a->idiag;
109     PetscFunctionReturn(PETSC_SUCCESS);
110   }
111   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
112   diag_offset = a->diag;
113   if (!a->idiag) PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag));
114   diag = a->idiag;
115   if (values) *values = a->idiag;
116   /* factor and invert each block */
117   switch (bs) {
118   case 1:
119     for (i = 0; i < mbs; i++) {
120       odiag   = v + 1 * diag_offset[i];
121       diag[0] = odiag[0];
122 
123       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
124         PetscCheck(allowzeropivot, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
125         A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
126         A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
127         A->factorerror_zeropivot_row   = i;
128         PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
129       }
130 
131       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
132       diag += 1;
133     }
134     break;
135   case 2:
136     for (i = 0; i < mbs; i++) {
137       odiag   = v + 4 * diag_offset[i];
138       diag[0] = odiag[0];
139       diag[1] = odiag[1];
140       diag[2] = odiag[2];
141       diag[3] = odiag[3];
142       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
143       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
144       diag += 4;
145     }
146     break;
147   case 3:
148     for (i = 0; i < mbs; i++) {
149       odiag   = v + 9 * diag_offset[i];
150       diag[0] = odiag[0];
151       diag[1] = odiag[1];
152       diag[2] = odiag[2];
153       diag[3] = odiag[3];
154       diag[4] = odiag[4];
155       diag[5] = odiag[5];
156       diag[6] = odiag[6];
157       diag[7] = odiag[7];
158       diag[8] = odiag[8];
159       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
160       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
161       diag += 9;
162     }
163     break;
164   case 4:
165     for (i = 0; i < mbs; i++) {
166       odiag = v + 16 * diag_offset[i];
167       PetscCall(PetscArraycpy(diag, odiag, 16));
168       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
169       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
170       diag += 16;
171     }
172     break;
173   case 5:
174     for (i = 0; i < mbs; i++) {
175       odiag = v + 25 * diag_offset[i];
176       PetscCall(PetscArraycpy(diag, odiag, 25));
177       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
178       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
179       diag += 25;
180     }
181     break;
182   case 6:
183     for (i = 0; i < mbs; i++) {
184       odiag = v + 36 * diag_offset[i];
185       PetscCall(PetscArraycpy(diag, odiag, 36));
186       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
187       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
188       diag += 36;
189     }
190     break;
191   case 7:
192     for (i = 0; i < mbs; i++) {
193       odiag = v + 49 * diag_offset[i];
194       PetscCall(PetscArraycpy(diag, odiag, 49));
195       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
196       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
197       diag += 49;
198     }
199     break;
200   default:
201     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
202     for (i = 0; i < mbs; i++) {
203       odiag = v + bs2 * diag_offset[i];
204       PetscCall(PetscArraycpy(diag, odiag, bs2));
205       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
206       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
207       diag += bs2;
208     }
209     PetscCall(PetscFree2(v_work, v_pivots));
210   }
211   a->idiagvalid = PETSC_TRUE;
212   PetscFunctionReturn(PETSC_SUCCESS);
213 }
214 
215 static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
216 {
217   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
218   PetscScalar       *x, *work, *w, *workt, *t;
219   const MatScalar   *v, *aa = a->a, *idiag;
220   const PetscScalar *b, *xb;
221   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
222   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
223   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;
224 
225   PetscFunctionBegin;
226   its = its * lits;
227   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
228   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
229   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
230   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
231   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
232 
233   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
234 
235   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
236   diag  = a->diag;
237   idiag = a->idiag;
238   k     = PetscMax(A->rmap->n, A->cmap->n);
239   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
240   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
241   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
242   work = a->mult_work;
243   t    = a->sor_workt;
244   w    = a->sor_work;
245 
246   PetscCall(VecGetArray(xx, &x));
247   PetscCall(VecGetArrayRead(bb, &b));
248 
249   if (flag & SOR_ZERO_INITIAL_GUESS) {
250     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
251       switch (bs) {
252       case 1:
253         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
254         t[0] = b[0];
255         i2   = 1;
256         idiag += 1;
257         for (i = 1; i < m; i++) {
258           v    = aa + ai[i];
259           vi   = aj + ai[i];
260           nz   = diag[i] - ai[i];
261           s[0] = b[i2];
262           for (j = 0; j < nz; j++) {
263             xw[0] = x[vi[j]];
264             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
265           }
266           t[i2] = s[0];
267           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
268           x[i2] = xw[0];
269           idiag += 1;
270           i2 += 1;
271         }
272         break;
273       case 2:
274         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
275         t[0] = b[0];
276         t[1] = b[1];
277         i2   = 2;
278         idiag += 4;
279         for (i = 1; i < m; i++) {
280           v    = aa + 4 * ai[i];
281           vi   = aj + ai[i];
282           nz   = diag[i] - ai[i];
283           s[0] = b[i2];
284           s[1] = b[i2 + 1];
285           for (j = 0; j < nz; j++) {
286             idx   = 2 * vi[j];
287             it    = 4 * j;
288             xw[0] = x[idx];
289             xw[1] = x[1 + idx];
290             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
291           }
292           t[i2]     = s[0];
293           t[i2 + 1] = s[1];
294           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
295           x[i2]     = xw[0];
296           x[i2 + 1] = xw[1];
297           idiag += 4;
298           i2 += 2;
299         }
300         break;
301       case 3:
302         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
303         t[0] = b[0];
304         t[1] = b[1];
305         t[2] = b[2];
306         i2   = 3;
307         idiag += 9;
308         for (i = 1; i < m; i++) {
309           v    = aa + 9 * ai[i];
310           vi   = aj + ai[i];
311           nz   = diag[i] - ai[i];
312           s[0] = b[i2];
313           s[1] = b[i2 + 1];
314           s[2] = b[i2 + 2];
315           while (nz--) {
316             idx   = 3 * (*vi++);
317             xw[0] = x[idx];
318             xw[1] = x[1 + idx];
319             xw[2] = x[2 + idx];
320             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
321             v += 9;
322           }
323           t[i2]     = s[0];
324           t[i2 + 1] = s[1];
325           t[i2 + 2] = s[2];
326           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
327           x[i2]     = xw[0];
328           x[i2 + 1] = xw[1];
329           x[i2 + 2] = xw[2];
330           idiag += 9;
331           i2 += 3;
332         }
333         break;
334       case 4:
335         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
336         t[0] = b[0];
337         t[1] = b[1];
338         t[2] = b[2];
339         t[3] = b[3];
340         i2   = 4;
341         idiag += 16;
342         for (i = 1; i < m; i++) {
343           v    = aa + 16 * ai[i];
344           vi   = aj + ai[i];
345           nz   = diag[i] - ai[i];
346           s[0] = b[i2];
347           s[1] = b[i2 + 1];
348           s[2] = b[i2 + 2];
349           s[3] = b[i2 + 3];
350           while (nz--) {
351             idx   = 4 * (*vi++);
352             xw[0] = x[idx];
353             xw[1] = x[1 + idx];
354             xw[2] = x[2 + idx];
355             xw[3] = x[3 + idx];
356             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
357             v += 16;
358           }
359           t[i2]     = s[0];
360           t[i2 + 1] = s[1];
361           t[i2 + 2] = s[2];
362           t[i2 + 3] = s[3];
363           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
364           x[i2]     = xw[0];
365           x[i2 + 1] = xw[1];
366           x[i2 + 2] = xw[2];
367           x[i2 + 3] = xw[3];
368           idiag += 16;
369           i2 += 4;
370         }
371         break;
372       case 5:
373         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
374         t[0] = b[0];
375         t[1] = b[1];
376         t[2] = b[2];
377         t[3] = b[3];
378         t[4] = b[4];
379         i2   = 5;
380         idiag += 25;
381         for (i = 1; i < m; i++) {
382           v    = aa + 25 * ai[i];
383           vi   = aj + ai[i];
384           nz   = diag[i] - ai[i];
385           s[0] = b[i2];
386           s[1] = b[i2 + 1];
387           s[2] = b[i2 + 2];
388           s[3] = b[i2 + 3];
389           s[4] = b[i2 + 4];
390           while (nz--) {
391             idx   = 5 * (*vi++);
392             xw[0] = x[idx];
393             xw[1] = x[1 + idx];
394             xw[2] = x[2 + idx];
395             xw[3] = x[3 + idx];
396             xw[4] = x[4 + idx];
397             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
398             v += 25;
399           }
400           t[i2]     = s[0];
401           t[i2 + 1] = s[1];
402           t[i2 + 2] = s[2];
403           t[i2 + 3] = s[3];
404           t[i2 + 4] = s[4];
405           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
406           x[i2]     = xw[0];
407           x[i2 + 1] = xw[1];
408           x[i2 + 2] = xw[2];
409           x[i2 + 3] = xw[3];
410           x[i2 + 4] = xw[4];
411           idiag += 25;
412           i2 += 5;
413         }
414         break;
415       case 6:
416         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
417         t[0] = b[0];
418         t[1] = b[1];
419         t[2] = b[2];
420         t[3] = b[3];
421         t[4] = b[4];
422         t[5] = b[5];
423         i2   = 6;
424         idiag += 36;
425         for (i = 1; i < m; i++) {
426           v    = aa + 36 * ai[i];
427           vi   = aj + ai[i];
428           nz   = diag[i] - ai[i];
429           s[0] = b[i2];
430           s[1] = b[i2 + 1];
431           s[2] = b[i2 + 2];
432           s[3] = b[i2 + 3];
433           s[4] = b[i2 + 4];
434           s[5] = b[i2 + 5];
435           while (nz--) {
436             idx   = 6 * (*vi++);
437             xw[0] = x[idx];
438             xw[1] = x[1 + idx];
439             xw[2] = x[2 + idx];
440             xw[3] = x[3 + idx];
441             xw[4] = x[4 + idx];
442             xw[5] = x[5 + idx];
443             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
444             v += 36;
445           }
446           t[i2]     = s[0];
447           t[i2 + 1] = s[1];
448           t[i2 + 2] = s[2];
449           t[i2 + 3] = s[3];
450           t[i2 + 4] = s[4];
451           t[i2 + 5] = s[5];
452           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
453           x[i2]     = xw[0];
454           x[i2 + 1] = xw[1];
455           x[i2 + 2] = xw[2];
456           x[i2 + 3] = xw[3];
457           x[i2 + 4] = xw[4];
458           x[i2 + 5] = xw[5];
459           idiag += 36;
460           i2 += 6;
461         }
462         break;
463       case 7:
464         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
465         t[0] = b[0];
466         t[1] = b[1];
467         t[2] = b[2];
468         t[3] = b[3];
469         t[4] = b[4];
470         t[5] = b[5];
471         t[6] = b[6];
472         i2   = 7;
473         idiag += 49;
474         for (i = 1; i < m; i++) {
475           v    = aa + 49 * ai[i];
476           vi   = aj + ai[i];
477           nz   = diag[i] - ai[i];
478           s[0] = b[i2];
479           s[1] = b[i2 + 1];
480           s[2] = b[i2 + 2];
481           s[3] = b[i2 + 3];
482           s[4] = b[i2 + 4];
483           s[5] = b[i2 + 5];
484           s[6] = b[i2 + 6];
485           while (nz--) {
486             idx   = 7 * (*vi++);
487             xw[0] = x[idx];
488             xw[1] = x[1 + idx];
489             xw[2] = x[2 + idx];
490             xw[3] = x[3 + idx];
491             xw[4] = x[4 + idx];
492             xw[5] = x[5 + idx];
493             xw[6] = x[6 + idx];
494             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
495             v += 49;
496           }
497           t[i2]     = s[0];
498           t[i2 + 1] = s[1];
499           t[i2 + 2] = s[2];
500           t[i2 + 3] = s[3];
501           t[i2 + 4] = s[4];
502           t[i2 + 5] = s[5];
503           t[i2 + 6] = s[6];
504           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
505           x[i2]     = xw[0];
506           x[i2 + 1] = xw[1];
507           x[i2 + 2] = xw[2];
508           x[i2 + 3] = xw[3];
509           x[i2 + 4] = xw[4];
510           x[i2 + 5] = xw[5];
511           x[i2 + 6] = xw[6];
512           idiag += 49;
513           i2 += 7;
514         }
515         break;
516       default:
517         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
518         PetscCall(PetscArraycpy(t, b, bs));
519         i2 = bs;
520         idiag += bs2;
521         for (i = 1; i < m; i++) {
522           v  = aa + bs2 * ai[i];
523           vi = aj + ai[i];
524           nz = diag[i] - ai[i];
525 
526           PetscCall(PetscArraycpy(w, b + i2, bs));
527           /* copy all rows of x that are needed into contiguous space */
528           workt = work;
529           for (j = 0; j < nz; j++) {
530             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
531             workt += bs;
532           }
533           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
534           PetscCall(PetscArraycpy(t + i2, w, bs));
535           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
536 
537           idiag += bs2;
538           i2 += bs;
539         }
540         break;
541       }
542       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
543       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
544       xb = t;
545     } else xb = b;
546     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
547       idiag = a->idiag + bs2 * (a->mbs - 1);
548       i2    = bs * (m - 1);
549       switch (bs) {
550       case 1:
551         s[0] = xb[i2];
552         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
553         x[i2] = xw[0];
554         i2 -= 1;
555         for (i = m - 2; i >= 0; i--) {
556           v    = aa + (diag[i] + 1);
557           vi   = aj + diag[i] + 1;
558           nz   = ai[i + 1] - diag[i] - 1;
559           s[0] = xb[i2];
560           for (j = 0; j < nz; j++) {
561             xw[0] = x[vi[j]];
562             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
563           }
564           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
565           x[i2] = xw[0];
566           idiag -= 1;
567           i2 -= 1;
568         }
569         break;
570       case 2:
571         s[0] = xb[i2];
572         s[1] = xb[i2 + 1];
573         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
574         x[i2]     = xw[0];
575         x[i2 + 1] = xw[1];
576         i2 -= 2;
577         idiag -= 4;
578         for (i = m - 2; i >= 0; i--) {
579           v    = aa + 4 * (diag[i] + 1);
580           vi   = aj + diag[i] + 1;
581           nz   = ai[i + 1] - diag[i] - 1;
582           s[0] = xb[i2];
583           s[1] = xb[i2 + 1];
584           for (j = 0; j < nz; j++) {
585             idx   = 2 * vi[j];
586             it    = 4 * j;
587             xw[0] = x[idx];
588             xw[1] = x[1 + idx];
589             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
590           }
591           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
592           x[i2]     = xw[0];
593           x[i2 + 1] = xw[1];
594           idiag -= 4;
595           i2 -= 2;
596         }
597         break;
598       case 3:
599         s[0] = xb[i2];
600         s[1] = xb[i2 + 1];
601         s[2] = xb[i2 + 2];
602         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
603         x[i2]     = xw[0];
604         x[i2 + 1] = xw[1];
605         x[i2 + 2] = xw[2];
606         i2 -= 3;
607         idiag -= 9;
608         for (i = m - 2; i >= 0; i--) {
609           v    = aa + 9 * (diag[i] + 1);
610           vi   = aj + diag[i] + 1;
611           nz   = ai[i + 1] - diag[i] - 1;
612           s[0] = xb[i2];
613           s[1] = xb[i2 + 1];
614           s[2] = xb[i2 + 2];
615           while (nz--) {
616             idx   = 3 * (*vi++);
617             xw[0] = x[idx];
618             xw[1] = x[1 + idx];
619             xw[2] = x[2 + idx];
620             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
621             v += 9;
622           }
623           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
624           x[i2]     = xw[0];
625           x[i2 + 1] = xw[1];
626           x[i2 + 2] = xw[2];
627           idiag -= 9;
628           i2 -= 3;
629         }
630         break;
631       case 4:
632         s[0] = xb[i2];
633         s[1] = xb[i2 + 1];
634         s[2] = xb[i2 + 2];
635         s[3] = xb[i2 + 3];
636         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
637         x[i2]     = xw[0];
638         x[i2 + 1] = xw[1];
639         x[i2 + 2] = xw[2];
640         x[i2 + 3] = xw[3];
641         i2 -= 4;
642         idiag -= 16;
643         for (i = m - 2; i >= 0; i--) {
644           v    = aa + 16 * (diag[i] + 1);
645           vi   = aj + diag[i] + 1;
646           nz   = ai[i + 1] - diag[i] - 1;
647           s[0] = xb[i2];
648           s[1] = xb[i2 + 1];
649           s[2] = xb[i2 + 2];
650           s[3] = xb[i2 + 3];
651           while (nz--) {
652             idx   = 4 * (*vi++);
653             xw[0] = x[idx];
654             xw[1] = x[1 + idx];
655             xw[2] = x[2 + idx];
656             xw[3] = x[3 + idx];
657             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
658             v += 16;
659           }
660           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
661           x[i2]     = xw[0];
662           x[i2 + 1] = xw[1];
663           x[i2 + 2] = xw[2];
664           x[i2 + 3] = xw[3];
665           idiag -= 16;
666           i2 -= 4;
667         }
668         break;
669       case 5:
670         s[0] = xb[i2];
671         s[1] = xb[i2 + 1];
672         s[2] = xb[i2 + 2];
673         s[3] = xb[i2 + 3];
674         s[4] = xb[i2 + 4];
675         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
676         x[i2]     = xw[0];
677         x[i2 + 1] = xw[1];
678         x[i2 + 2] = xw[2];
679         x[i2 + 3] = xw[3];
680         x[i2 + 4] = xw[4];
681         i2 -= 5;
682         idiag -= 25;
683         for (i = m - 2; i >= 0; i--) {
684           v    = aa + 25 * (diag[i] + 1);
685           vi   = aj + diag[i] + 1;
686           nz   = ai[i + 1] - diag[i] - 1;
687           s[0] = xb[i2];
688           s[1] = xb[i2 + 1];
689           s[2] = xb[i2 + 2];
690           s[3] = xb[i2 + 3];
691           s[4] = xb[i2 + 4];
692           while (nz--) {
693             idx   = 5 * (*vi++);
694             xw[0] = x[idx];
695             xw[1] = x[1 + idx];
696             xw[2] = x[2 + idx];
697             xw[3] = x[3 + idx];
698             xw[4] = x[4 + idx];
699             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
700             v += 25;
701           }
702           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
703           x[i2]     = xw[0];
704           x[i2 + 1] = xw[1];
705           x[i2 + 2] = xw[2];
706           x[i2 + 3] = xw[3];
707           x[i2 + 4] = xw[4];
708           idiag -= 25;
709           i2 -= 5;
710         }
711         break;
712       case 6:
713         s[0] = xb[i2];
714         s[1] = xb[i2 + 1];
715         s[2] = xb[i2 + 2];
716         s[3] = xb[i2 + 3];
717         s[4] = xb[i2 + 4];
718         s[5] = xb[i2 + 5];
719         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
720         x[i2]     = xw[0];
721         x[i2 + 1] = xw[1];
722         x[i2 + 2] = xw[2];
723         x[i2 + 3] = xw[3];
724         x[i2 + 4] = xw[4];
725         x[i2 + 5] = xw[5];
726         i2 -= 6;
727         idiag -= 36;
728         for (i = m - 2; i >= 0; i--) {
729           v    = aa + 36 * (diag[i] + 1);
730           vi   = aj + diag[i] + 1;
731           nz   = ai[i + 1] - diag[i] - 1;
732           s[0] = xb[i2];
733           s[1] = xb[i2 + 1];
734           s[2] = xb[i2 + 2];
735           s[3] = xb[i2 + 3];
736           s[4] = xb[i2 + 4];
737           s[5] = xb[i2 + 5];
738           while (nz--) {
739             idx   = 6 * (*vi++);
740             xw[0] = x[idx];
741             xw[1] = x[1 + idx];
742             xw[2] = x[2 + idx];
743             xw[3] = x[3 + idx];
744             xw[4] = x[4 + idx];
745             xw[5] = x[5 + idx];
746             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
747             v += 36;
748           }
749           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
750           x[i2]     = xw[0];
751           x[i2 + 1] = xw[1];
752           x[i2 + 2] = xw[2];
753           x[i2 + 3] = xw[3];
754           x[i2 + 4] = xw[4];
755           x[i2 + 5] = xw[5];
756           idiag -= 36;
757           i2 -= 6;
758         }
759         break;
760       case 7:
761         s[0] = xb[i2];
762         s[1] = xb[i2 + 1];
763         s[2] = xb[i2 + 2];
764         s[3] = xb[i2 + 3];
765         s[4] = xb[i2 + 4];
766         s[5] = xb[i2 + 5];
767         s[6] = xb[i2 + 6];
768         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
769         x[i2]     = xw[0];
770         x[i2 + 1] = xw[1];
771         x[i2 + 2] = xw[2];
772         x[i2 + 3] = xw[3];
773         x[i2 + 4] = xw[4];
774         x[i2 + 5] = xw[5];
775         x[i2 + 6] = xw[6];
776         i2 -= 7;
777         idiag -= 49;
778         for (i = m - 2; i >= 0; i--) {
779           v    = aa + 49 * (diag[i] + 1);
780           vi   = aj + diag[i] + 1;
781           nz   = ai[i + 1] - diag[i] - 1;
782           s[0] = xb[i2];
783           s[1] = xb[i2 + 1];
784           s[2] = xb[i2 + 2];
785           s[3] = xb[i2 + 3];
786           s[4] = xb[i2 + 4];
787           s[5] = xb[i2 + 5];
788           s[6] = xb[i2 + 6];
789           while (nz--) {
790             idx   = 7 * (*vi++);
791             xw[0] = x[idx];
792             xw[1] = x[1 + idx];
793             xw[2] = x[2 + idx];
794             xw[3] = x[3 + idx];
795             xw[4] = x[4 + idx];
796             xw[5] = x[5 + idx];
797             xw[6] = x[6 + idx];
798             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
799             v += 49;
800           }
801           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
802           x[i2]     = xw[0];
803           x[i2 + 1] = xw[1];
804           x[i2 + 2] = xw[2];
805           x[i2 + 3] = xw[3];
806           x[i2 + 4] = xw[4];
807           x[i2 + 5] = xw[5];
808           x[i2 + 6] = xw[6];
809           idiag -= 49;
810           i2 -= 7;
811         }
812         break;
813       default:
814         PetscCall(PetscArraycpy(w, xb + i2, bs));
815         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
816         i2 -= bs;
817         idiag -= bs2;
818         for (i = m - 2; i >= 0; i--) {
819           v  = aa + bs2 * (diag[i] + 1);
820           vi = aj + diag[i] + 1;
821           nz = ai[i + 1] - diag[i] - 1;
822 
823           PetscCall(PetscArraycpy(w, xb + i2, bs));
824           /* copy all rows of x that are needed into contiguous space */
825           workt = work;
826           for (j = 0; j < nz; j++) {
827             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
828             workt += bs;
829           }
830           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
831           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
832 
833           idiag -= bs2;
834           i2 -= bs;
835         }
836         break;
837       }
838       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
839     }
840     its--;
841   }
842   while (its--) {
843     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
844       idiag = a->idiag;
845       i2    = 0;
846       switch (bs) {
847       case 1:
848         for (i = 0; i < m; i++) {
849           v    = aa + ai[i];
850           vi   = aj + ai[i];
851           nz   = ai[i + 1] - ai[i];
852           s[0] = b[i2];
853           for (j = 0; j < nz; j++) {
854             xw[0] = x[vi[j]];
855             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
856           }
857           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
858           x[i2] += xw[0];
859           idiag += 1;
860           i2 += 1;
861         }
862         break;
863       case 2:
864         for (i = 0; i < m; i++) {
865           v    = aa + 4 * ai[i];
866           vi   = aj + ai[i];
867           nz   = ai[i + 1] - ai[i];
868           s[0] = b[i2];
869           s[1] = b[i2 + 1];
870           for (j = 0; j < nz; j++) {
871             idx   = 2 * vi[j];
872             it    = 4 * j;
873             xw[0] = x[idx];
874             xw[1] = x[1 + idx];
875             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
876           }
877           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
878           x[i2] += xw[0];
879           x[i2 + 1] += xw[1];
880           idiag += 4;
881           i2 += 2;
882         }
883         break;
884       case 3:
885         for (i = 0; i < m; i++) {
886           v    = aa + 9 * ai[i];
887           vi   = aj + ai[i];
888           nz   = ai[i + 1] - ai[i];
889           s[0] = b[i2];
890           s[1] = b[i2 + 1];
891           s[2] = b[i2 + 2];
892           while (nz--) {
893             idx   = 3 * (*vi++);
894             xw[0] = x[idx];
895             xw[1] = x[1 + idx];
896             xw[2] = x[2 + idx];
897             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
898             v += 9;
899           }
900           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
901           x[i2] += xw[0];
902           x[i2 + 1] += xw[1];
903           x[i2 + 2] += xw[2];
904           idiag += 9;
905           i2 += 3;
906         }
907         break;
908       case 4:
909         for (i = 0; i < m; i++) {
910           v    = aa + 16 * ai[i];
911           vi   = aj + ai[i];
912           nz   = ai[i + 1] - ai[i];
913           s[0] = b[i2];
914           s[1] = b[i2 + 1];
915           s[2] = b[i2 + 2];
916           s[3] = b[i2 + 3];
917           while (nz--) {
918             idx   = 4 * (*vi++);
919             xw[0] = x[idx];
920             xw[1] = x[1 + idx];
921             xw[2] = x[2 + idx];
922             xw[3] = x[3 + idx];
923             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
924             v += 16;
925           }
926           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
927           x[i2] += xw[0];
928           x[i2 + 1] += xw[1];
929           x[i2 + 2] += xw[2];
930           x[i2 + 3] += xw[3];
931           idiag += 16;
932           i2 += 4;
933         }
934         break;
935       case 5:
936         for (i = 0; i < m; i++) {
937           v    = aa + 25 * ai[i];
938           vi   = aj + ai[i];
939           nz   = ai[i + 1] - ai[i];
940           s[0] = b[i2];
941           s[1] = b[i2 + 1];
942           s[2] = b[i2 + 2];
943           s[3] = b[i2 + 3];
944           s[4] = b[i2 + 4];
945           while (nz--) {
946             idx   = 5 * (*vi++);
947             xw[0] = x[idx];
948             xw[1] = x[1 + idx];
949             xw[2] = x[2 + idx];
950             xw[3] = x[3 + idx];
951             xw[4] = x[4 + idx];
952             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
953             v += 25;
954           }
955           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
956           x[i2] += xw[0];
957           x[i2 + 1] += xw[1];
958           x[i2 + 2] += xw[2];
959           x[i2 + 3] += xw[3];
960           x[i2 + 4] += xw[4];
961           idiag += 25;
962           i2 += 5;
963         }
964         break;
965       case 6:
966         for (i = 0; i < m; i++) {
967           v    = aa + 36 * ai[i];
968           vi   = aj + ai[i];
969           nz   = ai[i + 1] - ai[i];
970           s[0] = b[i2];
971           s[1] = b[i2 + 1];
972           s[2] = b[i2 + 2];
973           s[3] = b[i2 + 3];
974           s[4] = b[i2 + 4];
975           s[5] = b[i2 + 5];
976           while (nz--) {
977             idx   = 6 * (*vi++);
978             xw[0] = x[idx];
979             xw[1] = x[1 + idx];
980             xw[2] = x[2 + idx];
981             xw[3] = x[3 + idx];
982             xw[4] = x[4 + idx];
983             xw[5] = x[5 + idx];
984             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
985             v += 36;
986           }
987           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
988           x[i2] += xw[0];
989           x[i2 + 1] += xw[1];
990           x[i2 + 2] += xw[2];
991           x[i2 + 3] += xw[3];
992           x[i2 + 4] += xw[4];
993           x[i2 + 5] += xw[5];
994           idiag += 36;
995           i2 += 6;
996         }
997         break;
998       case 7:
999         for (i = 0; i < m; i++) {
1000           v    = aa + 49 * ai[i];
1001           vi   = aj + ai[i];
1002           nz   = ai[i + 1] - ai[i];
1003           s[0] = b[i2];
1004           s[1] = b[i2 + 1];
1005           s[2] = b[i2 + 2];
1006           s[3] = b[i2 + 3];
1007           s[4] = b[i2 + 4];
1008           s[5] = b[i2 + 5];
1009           s[6] = b[i2 + 6];
1010           while (nz--) {
1011             idx   = 7 * (*vi++);
1012             xw[0] = x[idx];
1013             xw[1] = x[1 + idx];
1014             xw[2] = x[2 + idx];
1015             xw[3] = x[3 + idx];
1016             xw[4] = x[4 + idx];
1017             xw[5] = x[5 + idx];
1018             xw[6] = x[6 + idx];
1019             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1020             v += 49;
1021           }
1022           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1023           x[i2] += xw[0];
1024           x[i2 + 1] += xw[1];
1025           x[i2 + 2] += xw[2];
1026           x[i2 + 3] += xw[3];
1027           x[i2 + 4] += xw[4];
1028           x[i2 + 5] += xw[5];
1029           x[i2 + 6] += xw[6];
1030           idiag += 49;
1031           i2 += 7;
1032         }
1033         break;
1034       default:
1035         for (i = 0; i < m; i++) {
1036           v  = aa + bs2 * ai[i];
1037           vi = aj + ai[i];
1038           nz = ai[i + 1] - ai[i];
1039 
1040           PetscCall(PetscArraycpy(w, b + i2, bs));
1041           /* copy all rows of x that are needed into contiguous space */
1042           workt = work;
1043           for (j = 0; j < nz; j++) {
1044             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1045             workt += bs;
1046           }
1047           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1048           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1049 
1050           idiag += bs2;
1051           i2 += bs;
1052         }
1053         break;
1054       }
1055       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1056     }
1057     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1058       idiag = a->idiag + bs2 * (a->mbs - 1);
1059       i2    = bs * (m - 1);
1060       switch (bs) {
1061       case 1:
1062         for (i = m - 1; i >= 0; i--) {
1063           v    = aa + ai[i];
1064           vi   = aj + ai[i];
1065           nz   = ai[i + 1] - ai[i];
1066           s[0] = b[i2];
1067           for (j = 0; j < nz; j++) {
1068             xw[0] = x[vi[j]];
1069             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1070           }
1071           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1072           x[i2] += xw[0];
1073           idiag -= 1;
1074           i2 -= 1;
1075         }
1076         break;
1077       case 2:
1078         for (i = m - 1; i >= 0; i--) {
1079           v    = aa + 4 * ai[i];
1080           vi   = aj + ai[i];
1081           nz   = ai[i + 1] - ai[i];
1082           s[0] = b[i2];
1083           s[1] = b[i2 + 1];
1084           for (j = 0; j < nz; j++) {
1085             idx   = 2 * vi[j];
1086             it    = 4 * j;
1087             xw[0] = x[idx];
1088             xw[1] = x[1 + idx];
1089             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1090           }
1091           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
1092           x[i2] += xw[0];
1093           x[i2 + 1] += xw[1];
1094           idiag -= 4;
1095           i2 -= 2;
1096         }
1097         break;
1098       case 3:
1099         for (i = m - 1; i >= 0; i--) {
1100           v    = aa + 9 * ai[i];
1101           vi   = aj + ai[i];
1102           nz   = ai[i + 1] - ai[i];
1103           s[0] = b[i2];
1104           s[1] = b[i2 + 1];
1105           s[2] = b[i2 + 2];
1106           while (nz--) {
1107             idx   = 3 * (*vi++);
1108             xw[0] = x[idx];
1109             xw[1] = x[1 + idx];
1110             xw[2] = x[2 + idx];
1111             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1112             v += 9;
1113           }
1114           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
1115           x[i2] += xw[0];
1116           x[i2 + 1] += xw[1];
1117           x[i2 + 2] += xw[2];
1118           idiag -= 9;
1119           i2 -= 3;
1120         }
1121         break;
1122       case 4:
1123         for (i = m - 1; i >= 0; i--) {
1124           v    = aa + 16 * ai[i];
1125           vi   = aj + ai[i];
1126           nz   = ai[i + 1] - ai[i];
1127           s[0] = b[i2];
1128           s[1] = b[i2 + 1];
1129           s[2] = b[i2 + 2];
1130           s[3] = b[i2 + 3];
1131           while (nz--) {
1132             idx   = 4 * (*vi++);
1133             xw[0] = x[idx];
1134             xw[1] = x[1 + idx];
1135             xw[2] = x[2 + idx];
1136             xw[3] = x[3 + idx];
1137             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1138             v += 16;
1139           }
1140           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
1141           x[i2] += xw[0];
1142           x[i2 + 1] += xw[1];
1143           x[i2 + 2] += xw[2];
1144           x[i2 + 3] += xw[3];
1145           idiag -= 16;
1146           i2 -= 4;
1147         }
1148         break;
1149       case 5:
1150         for (i = m - 1; i >= 0; i--) {
1151           v    = aa + 25 * ai[i];
1152           vi   = aj + ai[i];
1153           nz   = ai[i + 1] - ai[i];
1154           s[0] = b[i2];
1155           s[1] = b[i2 + 1];
1156           s[2] = b[i2 + 2];
1157           s[3] = b[i2 + 3];
1158           s[4] = b[i2 + 4];
1159           while (nz--) {
1160             idx   = 5 * (*vi++);
1161             xw[0] = x[idx];
1162             xw[1] = x[1 + idx];
1163             xw[2] = x[2 + idx];
1164             xw[3] = x[3 + idx];
1165             xw[4] = x[4 + idx];
1166             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1167             v += 25;
1168           }
1169           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
1170           x[i2] += xw[0];
1171           x[i2 + 1] += xw[1];
1172           x[i2 + 2] += xw[2];
1173           x[i2 + 3] += xw[3];
1174           x[i2 + 4] += xw[4];
1175           idiag -= 25;
1176           i2 -= 5;
1177         }
1178         break;
1179       case 6:
1180         for (i = m - 1; i >= 0; i--) {
1181           v    = aa + 36 * ai[i];
1182           vi   = aj + ai[i];
1183           nz   = ai[i + 1] - ai[i];
1184           s[0] = b[i2];
1185           s[1] = b[i2 + 1];
1186           s[2] = b[i2 + 2];
1187           s[3] = b[i2 + 3];
1188           s[4] = b[i2 + 4];
1189           s[5] = b[i2 + 5];
1190           while (nz--) {
1191             idx   = 6 * (*vi++);
1192             xw[0] = x[idx];
1193             xw[1] = x[1 + idx];
1194             xw[2] = x[2 + idx];
1195             xw[3] = x[3 + idx];
1196             xw[4] = x[4 + idx];
1197             xw[5] = x[5 + idx];
1198             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1199             v += 36;
1200           }
1201           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
1202           x[i2] += xw[0];
1203           x[i2 + 1] += xw[1];
1204           x[i2 + 2] += xw[2];
1205           x[i2 + 3] += xw[3];
1206           x[i2 + 4] += xw[4];
1207           x[i2 + 5] += xw[5];
1208           idiag -= 36;
1209           i2 -= 6;
1210         }
1211         break;
1212       case 7:
1213         for (i = m - 1; i >= 0; i--) {
1214           v    = aa + 49 * ai[i];
1215           vi   = aj + ai[i];
1216           nz   = ai[i + 1] - ai[i];
1217           s[0] = b[i2];
1218           s[1] = b[i2 + 1];
1219           s[2] = b[i2 + 2];
1220           s[3] = b[i2 + 3];
1221           s[4] = b[i2 + 4];
1222           s[5] = b[i2 + 5];
1223           s[6] = b[i2 + 6];
1224           while (nz--) {
1225             idx   = 7 * (*vi++);
1226             xw[0] = x[idx];
1227             xw[1] = x[1 + idx];
1228             xw[2] = x[2 + idx];
1229             xw[3] = x[3 + idx];
1230             xw[4] = x[4 + idx];
1231             xw[5] = x[5 + idx];
1232             xw[6] = x[6 + idx];
1233             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1234             v += 49;
1235           }
1236           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1237           x[i2] += xw[0];
1238           x[i2 + 1] += xw[1];
1239           x[i2 + 2] += xw[2];
1240           x[i2 + 3] += xw[3];
1241           x[i2 + 4] += xw[4];
1242           x[i2 + 5] += xw[5];
1243           x[i2 + 6] += xw[6];
1244           idiag -= 49;
1245           i2 -= 7;
1246         }
1247         break;
1248       default:
1249         for (i = m - 1; i >= 0; i--) {
1250           v  = aa + bs2 * ai[i];
1251           vi = aj + ai[i];
1252           nz = ai[i + 1] - ai[i];
1253 
1254           PetscCall(PetscArraycpy(w, b + i2, bs));
1255           /* copy all rows of x that are needed into contiguous space */
1256           workt = work;
1257           for (j = 0; j < nz; j++) {
1258             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1259             workt += bs;
1260           }
1261           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1262           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1263 
1264           idiag -= bs2;
1265           i2 -= bs;
1266         }
1267         break;
1268       }
1269       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1270     }
1271   }
1272   PetscCall(VecRestoreArray(xx, &x));
1273   PetscCall(VecRestoreArrayRead(bb, &b));
1274   PetscFunctionReturn(PETSC_SUCCESS);
1275 }
1276 
1277 /*
1278     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1279 */
1280 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1281   #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1282 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1283   #define matsetvaluesblocked4_ matsetvaluesblocked4
1284 #endif
1285 
1286 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1287 {
1288   Mat                A = *AA;
1289   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1290   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1291   PetscInt          *ai = a->i, *ailen = a->ilen;
1292   PetscInt          *aj = a->j, stepval, lastcol = -1;
1293   const PetscScalar *value = v;
1294   MatScalar         *ap, *aa = a->a, *bap;
1295 
1296   PetscFunctionBegin;
1297   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1298   stepval = (n - 1) * 4;
1299   for (k = 0; k < m; k++) { /* loop over added rows */
1300     row  = im[k];
1301     rp   = aj + ai[row];
1302     ap   = aa + 16 * ai[row];
1303     nrow = ailen[row];
1304     low  = 0;
1305     high = nrow;
1306     for (l = 0; l < n; l++) { /* loop over added columns */
1307       col = in[l];
1308       if (col <= lastcol) low = 0;
1309       else high = nrow;
1310       lastcol = col;
1311       value   = v + k * (stepval + 4 + l) * 4;
1312       while (high - low > 7) {
1313         t = (low + high) / 2;
1314         if (rp[t] > col) high = t;
1315         else low = t;
1316       }
1317       for (i = low; i < high; i++) {
1318         if (rp[i] > col) break;
1319         if (rp[i] == col) {
1320           bap = ap + 16 * i;
1321           for (ii = 0; ii < 4; ii++, value += stepval) {
1322             for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1323           }
1324           goto noinsert2;
1325         }
1326       }
1327       N = nrow++ - 1;
1328       high++; /* added new column index thus must search to one higher than before */
1329       /* shift up all the later entries in this row */
1330       for (ii = N; ii >= i; ii--) {
1331         rp[ii + 1] = rp[ii];
1332         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1333       }
1334       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1335       rp[i] = col;
1336       bap   = ap + 16 * i;
1337       for (ii = 0; ii < 4; ii++, value += stepval) {
1338         for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1339       }
1340     noinsert2:;
1341       low = i;
1342     }
1343     ailen[row] = nrow;
1344   }
1345   PetscFunctionReturnVoid();
1346 }
1347 
1348 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1349   #define matsetvalues4_ MATSETVALUES4
1350 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1351   #define matsetvalues4_ matsetvalues4
1352 #endif
1353 
1354 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1355 {
1356   Mat          A = *AA;
1357   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1358   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1359   PetscInt    *ai = a->i, *ailen = a->ilen;
1360   PetscInt    *aj = a->j, brow, bcol;
1361   PetscInt     ridx, cidx, lastcol = -1;
1362   MatScalar   *ap, value, *aa      = a->a, *bap;
1363 
1364   PetscFunctionBegin;
1365   for (k = 0; k < m; k++) { /* loop over added rows */
1366     row  = im[k];
1367     brow = row / 4;
1368     rp   = aj + ai[brow];
1369     ap   = aa + 16 * ai[brow];
1370     nrow = ailen[brow];
1371     low  = 0;
1372     high = nrow;
1373     for (l = 0; l < n; l++) { /* loop over added columns */
1374       col   = in[l];
1375       bcol  = col / 4;
1376       ridx  = row % 4;
1377       cidx  = col % 4;
1378       value = v[l + k * n];
1379       if (col <= lastcol) low = 0;
1380       else high = nrow;
1381       lastcol = col;
1382       while (high - low > 7) {
1383         t = (low + high) / 2;
1384         if (rp[t] > bcol) high = t;
1385         else low = t;
1386       }
1387       for (i = low; i < high; i++) {
1388         if (rp[i] > bcol) break;
1389         if (rp[i] == bcol) {
1390           bap = ap + 16 * i + 4 * cidx + ridx;
1391           *bap += value;
1392           goto noinsert1;
1393         }
1394       }
1395       N = nrow++ - 1;
1396       high++; /* added new column thus must search to one higher than before */
1397       /* shift up all the later entries in this row */
1398       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
1399       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
1400       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1401       rp[i]                        = bcol;
1402       ap[16 * i + 4 * cidx + ridx] = value;
1403     noinsert1:;
1404       low = i;
1405     }
1406     ailen[brow] = nrow;
1407   }
1408   PetscFunctionReturnVoid();
1409 }
1410 
1411 /*
1412      Checks for missing diagonals
1413 */
1414 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1415 {
1416   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1417   PetscInt    *diag, *ii = a->i, i;
1418 
1419   PetscFunctionBegin;
1420   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
1421   *missing = PETSC_FALSE;
1422   if (A->rmap->n > 0 && !ii) {
1423     *missing = PETSC_TRUE;
1424     if (d) *d = 0;
1425     PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n"));
1426   } else {
1427     PetscInt n;
1428     n    = PetscMin(a->mbs, a->nbs);
1429     diag = a->diag;
1430     for (i = 0; i < n; i++) {
1431       if (diag[i] >= ii[i + 1]) {
1432         *missing = PETSC_TRUE;
1433         if (d) *d = i;
1434         PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i));
1435         break;
1436       }
1437     }
1438   }
1439   PetscFunctionReturn(PETSC_SUCCESS);
1440 }
1441 
1442 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1443 {
1444   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1445   PetscInt     i, j, m = a->mbs;
1446 
1447   PetscFunctionBegin;
1448   if (!a->diag) {
1449     PetscCall(PetscMalloc1(m, &a->diag));
1450     a->free_diag = PETSC_TRUE;
1451   }
1452   for (i = 0; i < m; i++) {
1453     a->diag[i] = a->i[i + 1];
1454     for (j = a->i[i]; j < a->i[i + 1]; j++) {
1455       if (a->j[j] == i) {
1456         a->diag[i] = j;
1457         break;
1458       }
1459     }
1460   }
1461   PetscFunctionReturn(PETSC_SUCCESS);
1462 }
1463 
1464 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1465 {
1466   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1467   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
1468   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
1469 
1470   PetscFunctionBegin;
1471   *nn = n;
1472   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1473   if (symmetric) {
1474     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1475     nz = tia[n];
1476   } else {
1477     tia = a->i;
1478     tja = a->j;
1479   }
1480 
1481   if (!blockcompressed && bs > 1) {
1482     (*nn) *= bs;
1483     /* malloc & create the natural set of indices */
1484     PetscCall(PetscMalloc1((n + 1) * bs, ia));
1485     if (n) {
1486       (*ia)[0] = oshift;
1487       for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
1488     }
1489 
1490     for (i = 1; i < n; i++) {
1491       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1492       for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
1493     }
1494     if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1495 
1496     if (inja) {
1497       PetscCall(PetscMalloc1(nz * bs * bs, ja));
1498       cnt = 0;
1499       for (i = 0; i < n; i++) {
1500         for (j = 0; j < bs; j++) {
1501           for (k = tia[i]; k < tia[i + 1]; k++) {
1502             for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
1503           }
1504         }
1505       }
1506     }
1507 
1508     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1509       PetscCall(PetscFree(tia));
1510       PetscCall(PetscFree(tja));
1511     }
1512   } else if (oshift == 1) {
1513     if (symmetric) {
1514       nz = tia[A->rmap->n / bs];
1515       /*  add 1 to i and j indices */
1516       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1517       *ia = tia;
1518       if (ja) {
1519         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1520         *ja = tja;
1521       }
1522     } else {
1523       nz = a->i[A->rmap->n / bs];
1524       /* malloc space and  add 1 to i and j indices */
1525       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1526       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1527       if (ja) {
1528         PetscCall(PetscMalloc1(nz, ja));
1529         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1530       }
1531     }
1532   } else {
1533     *ia = tia;
1534     if (ja) *ja = tja;
1535   }
1536   PetscFunctionReturn(PETSC_SUCCESS);
1537 }
1538 
1539 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1540 {
1541   PetscFunctionBegin;
1542   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1543   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1544     PetscCall(PetscFree(*ia));
1545     if (ja) PetscCall(PetscFree(*ja));
1546   }
1547   PetscFunctionReturn(PETSC_SUCCESS);
1548 }
1549 
1550 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1551 {
1552   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1553 
1554   PetscFunctionBegin;
1555   if (A->hash_active) {
1556     PetscInt bs;
1557     A->ops[0] = a->cops;
1558     PetscCall(PetscHMapIJVDestroy(&a->ht));
1559     PetscCall(MatGetBlockSize(A, &bs));
1560     if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht));
1561     PetscCall(PetscFree(a->dnz));
1562     PetscCall(PetscFree(a->bdnz));
1563     A->hash_active = PETSC_FALSE;
1564   }
1565   PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
1566   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
1567   PetscCall(ISDestroy(&a->row));
1568   PetscCall(ISDestroy(&a->col));
1569   if (a->free_diag) PetscCall(PetscFree(a->diag));
1570   PetscCall(PetscFree(a->idiag));
1571   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
1572   PetscCall(PetscFree(a->solve_work));
1573   PetscCall(PetscFree(a->mult_work));
1574   PetscCall(PetscFree(a->sor_workt));
1575   PetscCall(PetscFree(a->sor_work));
1576   PetscCall(ISDestroy(&a->icol));
1577   PetscCall(PetscFree(a->saved_values));
1578   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1579 
1580   PetscCall(MatDestroy(&a->sbaijMat));
1581   PetscCall(MatDestroy(&a->parent));
1582   PetscCall(PetscFree(A->data));
1583 
1584   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
1585   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
1586   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
1587   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
1588   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
1589   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
1590   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
1591   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
1592   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
1593   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
1594   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
1595   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
1596 #if defined(PETSC_HAVE_HYPRE)
1597   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
1598 #endif
1599   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
1600   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1601   PetscFunctionReturn(PETSC_SUCCESS);
1602 }
1603 
1604 static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1605 {
1606   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1607 
1608   PetscFunctionBegin;
1609   switch (op) {
1610   case MAT_ROW_ORIENTED:
1611     a->roworiented = flg;
1612     break;
1613   case MAT_KEEP_NONZERO_PATTERN:
1614     a->keepnonzeropattern = flg;
1615     break;
1616   case MAT_NEW_NONZERO_LOCATIONS:
1617     a->nonew = (flg ? 0 : 1);
1618     break;
1619   case MAT_NEW_NONZERO_LOCATION_ERR:
1620     a->nonew = (flg ? -1 : 0);
1621     break;
1622   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1623     a->nonew = (flg ? -2 : 0);
1624     break;
1625   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1626     a->nounused = (flg ? -1 : 0);
1627     break;
1628   default:
1629     break;
1630   }
1631   PetscFunctionReturn(PETSC_SUCCESS);
1632 }
1633 
1634 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1635 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1636 {
1637   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
1638   MatScalar   *aa_i;
1639   PetscScalar *v_i;
1640 
1641   PetscFunctionBegin;
1642   bs  = A->rmap->bs;
1643   bs2 = bs * bs;
1644   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1645 
1646   bn  = row / bs; /* Block number */
1647   bp  = row % bs; /* Block Position */
1648   M   = ai[bn + 1] - ai[bn];
1649   *nz = bs * M;
1650 
1651   if (v) {
1652     *v = NULL;
1653     if (*nz) {
1654       PetscCall(PetscMalloc1(*nz, v));
1655       for (i = 0; i < M; i++) { /* for each block in the block row */
1656         v_i  = *v + i * bs;
1657         aa_i = aa + bs2 * (ai[bn] + i);
1658         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
1659       }
1660     }
1661   }
1662 
1663   if (idx) {
1664     *idx = NULL;
1665     if (*nz) {
1666       PetscCall(PetscMalloc1(*nz, idx));
1667       for (i = 0; i < M; i++) { /* for each block in the block row */
1668         idx_i = *idx + i * bs;
1669         itmp  = bs * aj[ai[bn] + i];
1670         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
1671       }
1672     }
1673   }
1674   PetscFunctionReturn(PETSC_SUCCESS);
1675 }
1676 
1677 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1678 {
1679   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1680 
1681   PetscFunctionBegin;
1682   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
1683   PetscFunctionReturn(PETSC_SUCCESS);
1684 }
1685 
1686 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1687 {
1688   PetscFunctionBegin;
1689   if (idx) PetscCall(PetscFree(*idx));
1690   if (v) PetscCall(PetscFree(*v));
1691   PetscFunctionReturn(PETSC_SUCCESS);
1692 }
1693 
1694 static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1695 {
1696   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
1697   Mat          C;
1698   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
1699   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
1700   MatScalar   *ata, *aa = a->a;
1701 
1702   PetscFunctionBegin;
1703   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1704   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1705   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1706     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1707 
1708     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
1709     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
1710     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
1711     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
1712 
1713     at  = (Mat_SeqBAIJ *)C->data;
1714     ati = at->i;
1715     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1716   } else {
1717     C   = *B;
1718     at  = (Mat_SeqBAIJ *)C->data;
1719     ati = at->i;
1720   }
1721 
1722   atj = at->j;
1723   ata = at->a;
1724 
1725   /* Copy ati into atfill so we have locations of the next free space in atj */
1726   PetscCall(PetscArraycpy(atfill, ati, nbs));
1727 
1728   /* Walk through A row-wise and mark nonzero entries of A^T. */
1729   for (i = 0; i < mbs; i++) {
1730     anzj = ai[i + 1] - ai[i];
1731     for (j = 0; j < anzj; j++) {
1732       atj[atfill[*aj]] = i;
1733       for (kr = 0; kr < bs; kr++) {
1734         for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
1735       }
1736       atfill[*aj++] += 1;
1737     }
1738   }
1739   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1740   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1741 
1742   /* Clean up temporary space and complete requests. */
1743   PetscCall(PetscFree(atfill));
1744 
1745   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1746     PetscCall(MatSetBlockSizes(C, A->cmap->bs, A->rmap->bs));
1747     *B = C;
1748   } else {
1749     PetscCall(MatHeaderMerge(A, &C));
1750   }
1751   PetscFunctionReturn(PETSC_SUCCESS);
1752 }
1753 
1754 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1755 {
1756   Mat Btrans;
1757 
1758   PetscFunctionBegin;
1759   *f = PETSC_FALSE;
1760   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
1761   PetscCall(MatEqual_SeqBAIJ(B, Btrans, f));
1762   PetscCall(MatDestroy(&Btrans));
1763   PetscFunctionReturn(PETSC_SUCCESS);
1764 }
1765 
1766 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1767 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1768 {
1769   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1770   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1771   PetscInt    *rowlens, *colidxs;
1772   PetscScalar *matvals;
1773 
1774   PetscFunctionBegin;
1775   PetscCall(PetscViewerSetUp(viewer));
1776 
1777   M  = mat->rmap->N;
1778   N  = mat->cmap->N;
1779   m  = mat->rmap->n;
1780   bs = mat->rmap->bs;
1781   nz = bs * bs * A->nz;
1782 
1783   /* write matrix header */
1784   header[0] = MAT_FILE_CLASSID;
1785   header[1] = M;
1786   header[2] = N;
1787   header[3] = nz;
1788   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1789 
1790   /* store row lengths */
1791   PetscCall(PetscMalloc1(m, &rowlens));
1792   for (cnt = 0, i = 0; i < A->mbs; i++)
1793     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
1794   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
1795   PetscCall(PetscFree(rowlens));
1796 
1797   /* store column indices  */
1798   PetscCall(PetscMalloc1(nz, &colidxs));
1799   for (cnt = 0, i = 0; i < A->mbs; i++)
1800     for (k = 0; k < bs; k++)
1801       for (j = A->i[i]; j < A->i[i + 1]; j++)
1802         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
1803   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1804   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
1805   PetscCall(PetscFree(colidxs));
1806 
1807   /* store nonzero values */
1808   PetscCall(PetscMalloc1(nz, &matvals));
1809   for (cnt = 0, i = 0; i < A->mbs; i++)
1810     for (k = 0; k < bs; k++)
1811       for (j = A->i[i]; j < A->i[i + 1]; j++)
1812         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
1813   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1814   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
1815   PetscCall(PetscFree(matvals));
1816 
1817   /* write block size option to the viewer's .info file */
1818   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1819   PetscFunctionReturn(PETSC_SUCCESS);
1820 }
1821 
1822 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1823 {
1824   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1825   PetscInt     i, bs = A->rmap->bs, k;
1826 
1827   PetscFunctionBegin;
1828   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1829   for (i = 0; i < a->mbs; i++) {
1830     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
1831     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
1832     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1833   }
1834   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1835   PetscFunctionReturn(PETSC_SUCCESS);
1836 }
1837 
1838 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1839 {
1840   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1841   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1842   PetscViewerFormat format;
1843 
1844   PetscFunctionBegin;
1845   if (A->structure_only) {
1846     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
1847     PetscFunctionReturn(PETSC_SUCCESS);
1848   }
1849 
1850   PetscCall(PetscViewerGetFormat(viewer, &format));
1851   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1852     PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1853   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1854     const char *matname;
1855     Mat         aij;
1856     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
1857     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
1858     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
1859     PetscCall(MatView(aij, viewer));
1860     PetscCall(MatDestroy(&aij));
1861   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1862     PetscFunctionReturn(PETSC_SUCCESS);
1863   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1864     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1865     for (i = 0; i < a->mbs; i++) {
1866       for (j = 0; j < bs; j++) {
1867         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1868         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1869           for (l = 0; l < bs; l++) {
1870 #if defined(PETSC_USE_COMPLEX)
1871             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1872               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1873             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1874               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1875             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1876               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1877             }
1878 #else
1879             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1880 #endif
1881           }
1882         }
1883         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1884       }
1885     }
1886     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1887   } else {
1888     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1889     for (i = 0; i < a->mbs; i++) {
1890       for (j = 0; j < bs; j++) {
1891         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1892         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1893           for (l = 0; l < bs; l++) {
1894 #if defined(PETSC_USE_COMPLEX)
1895             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
1896               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1897             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
1898               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1899             } else {
1900               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1901             }
1902 #else
1903             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1904 #endif
1905           }
1906         }
1907         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1908       }
1909     }
1910     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1911   }
1912   PetscCall(PetscViewerFlush(viewer));
1913   PetscFunctionReturn(PETSC_SUCCESS);
1914 }
1915 
1916 #include <petscdraw.h>
1917 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1918 {
1919   Mat               A = (Mat)Aa;
1920   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1921   PetscInt          row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
1922   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1923   MatScalar        *aa;
1924   PetscViewer       viewer;
1925   PetscViewerFormat format;
1926   int               color;
1927 
1928   PetscFunctionBegin;
1929   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
1930   PetscCall(PetscViewerGetFormat(viewer, &format));
1931   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
1932 
1933   /* loop over matrix elements drawing boxes */
1934 
1935   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1936     PetscDrawCollectiveBegin(draw);
1937     /* Blue for negative, Cyan for zero and  Red for positive */
1938     color = PETSC_DRAW_BLUE;
1939     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1940       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1941         y_l = A->rmap->N - row - 1.0;
1942         y_r = y_l + 1.0;
1943         x_l = a->j[j] * bs;
1944         x_r = x_l + 1.0;
1945         aa  = a->a + j * bs2;
1946         for (k = 0; k < bs; k++) {
1947           for (l = 0; l < bs; l++) {
1948             if (PetscRealPart(*aa++) >= 0.) continue;
1949             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1950           }
1951         }
1952       }
1953     }
1954     color = PETSC_DRAW_CYAN;
1955     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1956       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1957         y_l = A->rmap->N - row - 1.0;
1958         y_r = y_l + 1.0;
1959         x_l = a->j[j] * bs;
1960         x_r = x_l + 1.0;
1961         aa  = a->a + j * bs2;
1962         for (k = 0; k < bs; k++) {
1963           for (l = 0; l < bs; l++) {
1964             if (PetscRealPart(*aa++) != 0.) continue;
1965             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1966           }
1967         }
1968       }
1969     }
1970     color = PETSC_DRAW_RED;
1971     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1972       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1973         y_l = A->rmap->N - row - 1.0;
1974         y_r = y_l + 1.0;
1975         x_l = a->j[j] * bs;
1976         x_r = x_l + 1.0;
1977         aa  = a->a + j * bs2;
1978         for (k = 0; k < bs; k++) {
1979           for (l = 0; l < bs; l++) {
1980             if (PetscRealPart(*aa++) <= 0.) continue;
1981             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1982           }
1983         }
1984       }
1985     }
1986     PetscDrawCollectiveEnd(draw);
1987   } else {
1988     /* use contour shading to indicate magnitude of values */
1989     /* first determine max of all nonzero values */
1990     PetscReal minv = 0.0, maxv = 0.0;
1991     PetscDraw popup;
1992 
1993     for (i = 0; i < a->nz * a->bs2; i++) {
1994       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1995     }
1996     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1997     PetscCall(PetscDrawGetPopup(draw, &popup));
1998     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
1999 
2000     PetscDrawCollectiveBegin(draw);
2001     for (i = 0, row = 0; i < mbs; i++, row += bs) {
2002       for (j = a->i[i]; j < a->i[i + 1]; j++) {
2003         y_l = A->rmap->N - row - 1.0;
2004         y_r = y_l + 1.0;
2005         x_l = a->j[j] * bs;
2006         x_r = x_l + 1.0;
2007         aa  = a->a + j * bs2;
2008         for (k = 0; k < bs; k++) {
2009           for (l = 0; l < bs; l++) {
2010             MatScalar v = *aa++;
2011             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
2012             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2013           }
2014         }
2015       }
2016     }
2017     PetscDrawCollectiveEnd(draw);
2018   }
2019   PetscFunctionReturn(PETSC_SUCCESS);
2020 }
2021 
2022 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2023 {
2024   PetscReal xl, yl, xr, yr, w, h;
2025   PetscDraw draw;
2026   PetscBool isnull;
2027 
2028   PetscFunctionBegin;
2029   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
2030   PetscCall(PetscDrawIsNull(draw, &isnull));
2031   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
2032 
2033   xr = A->cmap->n;
2034   yr = A->rmap->N;
2035   h  = yr / 10.0;
2036   w  = xr / 10.0;
2037   xr += w;
2038   yr += h;
2039   xl = -w;
2040   yl = -h;
2041   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
2042   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
2043   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
2044   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
2045   PetscCall(PetscDrawSave(draw));
2046   PetscFunctionReturn(PETSC_SUCCESS);
2047 }
2048 
2049 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2050 {
2051   PetscBool isascii, isbinary, isdraw;
2052 
2053   PetscFunctionBegin;
2054   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
2055   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2056   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
2057   if (isascii) {
2058     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
2059   } else if (isbinary) {
2060     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
2061   } else if (isdraw) {
2062     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
2063   } else {
2064     Mat B;
2065     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2066     PetscCall(MatView(B, viewer));
2067     PetscCall(MatDestroy(&B));
2068   }
2069   PetscFunctionReturn(PETSC_SUCCESS);
2070 }
2071 
2072 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2073 {
2074   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2075   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2076   PetscInt    *ai = a->i, *ailen = a->ilen;
2077   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
2078   MatScalar   *ap, *aa = a->a;
2079 
2080   PetscFunctionBegin;
2081   for (k = 0; k < m; k++) { /* loop over rows */
2082     row  = im[k];
2083     brow = row / bs;
2084     if (row < 0) {
2085       v += n;
2086       continue;
2087     } /* negative row */
2088     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2089     rp   = PetscSafePointerPlusOffset(aj, ai[brow]);
2090     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
2091     nrow = ailen[brow];
2092     for (l = 0; l < n; l++) { /* loop over columns */
2093       if (in[l] < 0) {
2094         v++;
2095         continue;
2096       } /* negative column */
2097       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
2098       col  = in[l];
2099       bcol = col / bs;
2100       cidx = col % bs;
2101       ridx = row % bs;
2102       high = nrow;
2103       low  = 0; /* assume unsorted */
2104       while (high - low > 5) {
2105         t = (low + high) / 2;
2106         if (rp[t] > bcol) high = t;
2107         else low = t;
2108       }
2109       for (i = low; i < high; i++) {
2110         if (rp[i] > bcol) break;
2111         if (rp[i] == bcol) {
2112           *v++ = ap[bs2 * i + bs * cidx + ridx];
2113           goto finished;
2114         }
2115       }
2116       *v++ = 0.0;
2117     finished:;
2118     }
2119   }
2120   PetscFunctionReturn(PETSC_SUCCESS);
2121 }
2122 
2123 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2124 {
2125   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2126   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2127   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2128   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2129   PetscBool          roworiented = a->roworiented;
2130   const PetscScalar *value       = v;
2131   MatScalar         *ap = NULL, *aa = a->a, *bap;
2132 
2133   PetscFunctionBegin;
2134   if (roworiented) {
2135     stepval = (n - 1) * bs;
2136   } else {
2137     stepval = (m - 1) * bs;
2138   }
2139   for (k = 0; k < m; k++) { /* loop over added rows */
2140     row = im[k];
2141     if (row < 0) continue;
2142     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
2143     rp = aj + ai[row];
2144     if (!A->structure_only) ap = aa + bs2 * ai[row];
2145     rmax = imax[row];
2146     nrow = ailen[row];
2147     low  = 0;
2148     high = nrow;
2149     for (l = 0; l < n; l++) { /* loop over added columns */
2150       if (in[l] < 0) continue;
2151       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
2152       col = in[l];
2153       if (!A->structure_only) {
2154         if (roworiented) {
2155           value = v + (k * (stepval + bs) + l) * bs;
2156         } else {
2157           value = v + (l * (stepval + bs) + k) * bs;
2158         }
2159       }
2160       if (col <= lastcol) low = 0;
2161       else high = nrow;
2162       lastcol = col;
2163       while (high - low > 7) {
2164         t = (low + high) / 2;
2165         if (rp[t] > col) high = t;
2166         else low = t;
2167       }
2168       for (i = low; i < high; i++) {
2169         if (rp[i] > col) break;
2170         if (rp[i] == col) {
2171           if (A->structure_only) goto noinsert2;
2172           bap = ap + bs2 * i;
2173           if (roworiented) {
2174             if (is == ADD_VALUES) {
2175               for (ii = 0; ii < bs; ii++, value += stepval) {
2176                 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2177               }
2178             } else {
2179               for (ii = 0; ii < bs; ii++, value += stepval) {
2180                 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2181               }
2182             }
2183           } else {
2184             if (is == ADD_VALUES) {
2185               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2186                 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
2187                 bap += bs;
2188               }
2189             } else {
2190               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2191                 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
2192                 bap += bs;
2193               }
2194             }
2195           }
2196           goto noinsert2;
2197         }
2198       }
2199       if (nonew == 1) goto noinsert2;
2200       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2201       if (A->structure_only) {
2202         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
2203       } else {
2204         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2205       }
2206       N = nrow++ - 1;
2207       high++;
2208       /* shift up all the later entries in this row */
2209       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2210       rp[i] = col;
2211       if (!A->structure_only) {
2212         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2213         bap = ap + bs2 * i;
2214         if (roworiented) {
2215           for (ii = 0; ii < bs; ii++, value += stepval) {
2216             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2217           }
2218         } else {
2219           for (ii = 0; ii < bs; ii++, value += stepval) {
2220             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2221           }
2222         }
2223       }
2224     noinsert2:;
2225       low = i;
2226     }
2227     ailen[row] = nrow;
2228   }
2229   PetscFunctionReturn(PETSC_SUCCESS);
2230 }
2231 
2232 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2233 {
2234   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2235   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2236   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2237   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2238   MatScalar   *aa    = a->a, *ap;
2239   PetscReal    ratio = 0.6;
2240 
2241   PetscFunctionBegin;
2242   if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS);
2243 
2244   if (m) rmax = ailen[0];
2245   for (i = 1; i < mbs; i++) {
2246     /* move each row back by the amount of empty slots (fshift) before it*/
2247     fshift += imax[i - 1] - ailen[i - 1];
2248     rmax = PetscMax(rmax, ailen[i]);
2249     if (fshift) {
2250       ip = aj + ai[i];
2251       ap = aa + bs2 * ai[i];
2252       N  = ailen[i];
2253       PetscCall(PetscArraymove(ip - fshift, ip, N));
2254       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2255     }
2256     ai[i] = ai[i - 1] + ailen[i - 1];
2257   }
2258   if (mbs) {
2259     fshift += imax[mbs - 1] - ailen[mbs - 1];
2260     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2261   }
2262 
2263   /* reset ilen and imax for each row */
2264   a->nonzerorowcnt = 0;
2265   if (A->structure_only) {
2266     PetscCall(PetscFree2(a->imax, a->ilen));
2267   } else { /* !A->structure_only */
2268     for (i = 0; i < mbs; i++) {
2269       ailen[i] = imax[i] = ai[i + 1] - ai[i];
2270       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2271     }
2272   }
2273   a->nz = ai[mbs];
2274 
2275   /* diagonals may have moved, so kill the diagonal pointers */
2276   a->idiagvalid = PETSC_FALSE;
2277   if (fshift && a->diag) PetscCall(PetscFree(a->diag));
2278   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
2279   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
2280   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
2281   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
2282 
2283   A->info.mallocs += a->reallocs;
2284   a->reallocs         = 0;
2285   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2286   a->rmax             = rmax;
2287 
2288   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
2289   PetscFunctionReturn(PETSC_SUCCESS);
2290 }
2291 
2292 /*
2293    This function returns an array of flags which indicate the locations of contiguous
2294    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2295    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2296    Assume: sizes should be long enough to hold all the values.
2297 */
2298 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2299 {
2300   PetscInt j = 0;
2301 
2302   PetscFunctionBegin;
2303   for (PetscInt i = 0; i < n; j++) {
2304     PetscInt row = idx[i];
2305     if (row % bs != 0) { /* Not the beginning of a block */
2306       sizes[j] = 1;
2307       i++;
2308     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2309       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2310       i++;
2311     } else { /* Beginning of the block, so check if the complete block exists */
2312       PetscBool flg = PETSC_TRUE;
2313       for (PetscInt k = 1; k < bs; k++) {
2314         if (row + k != idx[i + k]) { /* break in the block */
2315           flg = PETSC_FALSE;
2316           break;
2317         }
2318       }
2319       if (flg) { /* No break in the bs */
2320         sizes[j] = bs;
2321         i += bs;
2322       } else {
2323         sizes[j] = 1;
2324         i++;
2325       }
2326     }
2327   }
2328   *bs_max = j;
2329   PetscFunctionReturn(PETSC_SUCCESS);
2330 }
2331 
2332 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2333 {
2334   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2335   PetscInt           i, j, k, count, *rows;
2336   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
2337   PetscScalar        zero = 0.0;
2338   MatScalar         *aa;
2339   const PetscScalar *xx;
2340   PetscScalar       *bb;
2341 
2342   PetscFunctionBegin;
2343   /* fix right-hand side if needed */
2344   if (x && b) {
2345     PetscCall(VecGetArrayRead(x, &xx));
2346     PetscCall(VecGetArray(b, &bb));
2347     for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
2348     PetscCall(VecRestoreArrayRead(x, &xx));
2349     PetscCall(VecRestoreArray(b, &bb));
2350   }
2351 
2352   /* Make a copy of the IS and  sort it */
2353   /* allocate memory for rows,sizes */
2354   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2355 
2356   /* copy IS values to rows, and sort them */
2357   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
2358   PetscCall(PetscSortInt(is_n, rows));
2359 
2360   if (baij->keepnonzeropattern) {
2361     for (i = 0; i < is_n; i++) sizes[i] = 1;
2362     bs_max = is_n;
2363   } else {
2364     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2365     A->nonzerostate++;
2366   }
2367 
2368   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2369     row = rows[j];
2370     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2371     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2372     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2373     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2374       if (diag != (PetscScalar)0.0) {
2375         if (baij->ilen[row / bs] > 0) {
2376           baij->ilen[row / bs]       = 1;
2377           baij->j[baij->i[row / bs]] = row / bs;
2378 
2379           PetscCall(PetscArrayzero(aa, count * bs));
2380         }
2381         /* Now insert all the diagonal values for this bs */
2382         for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES);
2383       } else { /* (diag == 0.0) */
2384         baij->ilen[row / bs] = 0;
2385       } /* end (diag == 0.0) */
2386     } else { /* (sizes[i] != bs) */
2387       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2388       for (k = 0; k < count; k++) {
2389         aa[0] = zero;
2390         aa += bs;
2391       }
2392       if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES);
2393     }
2394   }
2395 
2396   PetscCall(PetscFree2(rows, sizes));
2397   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2398   PetscFunctionReturn(PETSC_SUCCESS);
2399 }
2400 
2401 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2402 {
2403   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2404   PetscInt           i, j, k, count;
2405   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
2406   PetscScalar        zero = 0.0;
2407   MatScalar         *aa;
2408   const PetscScalar *xx;
2409   PetscScalar       *bb;
2410   PetscBool         *zeroed, vecs = PETSC_FALSE;
2411 
2412   PetscFunctionBegin;
2413   /* fix right-hand side if needed */
2414   if (x && b) {
2415     PetscCall(VecGetArrayRead(x, &xx));
2416     PetscCall(VecGetArray(b, &bb));
2417     vecs = PETSC_TRUE;
2418   }
2419 
2420   /* zero the columns */
2421   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
2422   for (i = 0; i < is_n; i++) {
2423     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
2424     zeroed[is_idx[i]] = PETSC_TRUE;
2425   }
2426   for (i = 0; i < A->rmap->N; i++) {
2427     if (!zeroed[i]) {
2428       row = i / bs;
2429       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
2430         for (k = 0; k < bs; k++) {
2431           col = bs * baij->j[j] + k;
2432           if (zeroed[col]) {
2433             aa = baij->a + j * bs2 + (i % bs) + bs * k;
2434             if (vecs) bb[i] -= aa[0] * xx[col];
2435             aa[0] = 0.0;
2436           }
2437         }
2438       }
2439     } else if (vecs) bb[i] = diag * xx[i];
2440   }
2441   PetscCall(PetscFree(zeroed));
2442   if (vecs) {
2443     PetscCall(VecRestoreArrayRead(x, &xx));
2444     PetscCall(VecRestoreArray(b, &bb));
2445   }
2446 
2447   /* zero the rows */
2448   for (i = 0; i < is_n; i++) {
2449     row   = is_idx[i];
2450     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2451     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2452     for (k = 0; k < count; k++) {
2453       aa[0] = zero;
2454       aa += bs;
2455     }
2456     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
2457   }
2458   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2459   PetscFunctionReturn(PETSC_SUCCESS);
2460 }
2461 
2462 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2463 {
2464   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2465   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2466   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2467   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2468   PetscInt     ridx, cidx, bs2                 = a->bs2;
2469   PetscBool    roworiented = a->roworiented;
2470   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;
2471 
2472   PetscFunctionBegin;
2473   for (k = 0; k < m; k++) { /* loop over added rows */
2474     row  = im[k];
2475     brow = row / bs;
2476     if (row < 0) continue;
2477     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
2478     rp = PetscSafePointerPlusOffset(aj, ai[brow]);
2479     if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
2480     rmax = imax[brow];
2481     nrow = ailen[brow];
2482     low  = 0;
2483     high = nrow;
2484     for (l = 0; l < n; l++) { /* loop over added columns */
2485       if (in[l] < 0) continue;
2486       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
2487       col  = in[l];
2488       bcol = col / bs;
2489       ridx = row % bs;
2490       cidx = col % bs;
2491       if (!A->structure_only) {
2492         if (roworiented) {
2493           value = v[l + k * n];
2494         } else {
2495           value = v[k + l * m];
2496         }
2497       }
2498       if (col <= lastcol) low = 0;
2499       else high = nrow;
2500       lastcol = col;
2501       while (high - low > 7) {
2502         t = (low + high) / 2;
2503         if (rp[t] > bcol) high = t;
2504         else low = t;
2505       }
2506       for (i = low; i < high; i++) {
2507         if (rp[i] > bcol) break;
2508         if (rp[i] == bcol) {
2509           bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx);
2510           if (!A->structure_only) {
2511             if (is == ADD_VALUES) *bap += value;
2512             else *bap = value;
2513           }
2514           goto noinsert1;
2515         }
2516       }
2517       if (nonew == 1) goto noinsert1;
2518       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2519       if (A->structure_only) {
2520         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2521       } else {
2522         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2523       }
2524       N = nrow++ - 1;
2525       high++;
2526       /* shift up all the later entries in this row */
2527       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2528       rp[i] = bcol;
2529       if (!A->structure_only) {
2530         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2531         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2532         ap[bs2 * i + bs * cidx + ridx] = value;
2533       }
2534       a->nz++;
2535     noinsert1:;
2536       low = i;
2537     }
2538     ailen[brow] = nrow;
2539   }
2540   PetscFunctionReturn(PETSC_SUCCESS);
2541 }
2542 
2543 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2544 {
2545   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
2546   Mat          outA;
2547   PetscBool    row_identity, col_identity;
2548 
2549   PetscFunctionBegin;
2550   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
2551   PetscCall(ISIdentity(row, &row_identity));
2552   PetscCall(ISIdentity(col, &col_identity));
2553   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
2554 
2555   outA            = inA;
2556   inA->factortype = MAT_FACTOR_LU;
2557   PetscCall(PetscFree(inA->solvertype));
2558   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
2559 
2560   PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2561 
2562   PetscCall(PetscObjectReference((PetscObject)row));
2563   PetscCall(ISDestroy(&a->row));
2564   a->row = row;
2565   PetscCall(PetscObjectReference((PetscObject)col));
2566   PetscCall(ISDestroy(&a->col));
2567   a->col = col;
2568 
2569   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2570   PetscCall(ISDestroy(&a->icol));
2571   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2572 
2573   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2574   if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
2575   PetscCall(MatLUFactorNumeric(outA, inA, info));
2576   PetscFunctionReturn(PETSC_SUCCESS);
2577 }
2578 
2579 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2580 {
2581   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2582 
2583   PetscFunctionBegin;
2584   baij->nz = baij->maxnz;
2585   PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2586   PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
2587   PetscFunctionReturn(PETSC_SUCCESS);
2588 }
2589 
2590 /*@
2591   MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix.
2592 
2593   Input Parameters:
2594 + mat     - the `MATSEQBAIJ` matrix
2595 - indices - the block column indices
2596 
2597   Level: advanced
2598 
2599   Notes:
2600   This can be called if you have precomputed the nonzero structure of the
2601   matrix and want to provide it to the matrix object to improve the performance
2602   of the `MatSetValues()` operation.
2603 
2604   You MUST have set the correct numbers of nonzeros per row in the call to
2605   `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
2606 
2607   MUST be called before any calls to `MatSetValues()`
2608 
2609 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()`
2610 @*/
2611 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2612 {
2613   PetscFunctionBegin;
2614   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2615   PetscAssertPointer(indices, 2);
2616   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices));
2617   PetscFunctionReturn(PETSC_SUCCESS);
2618 }
2619 
2620 static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2621 {
2622   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2623   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2624   PetscReal    atmp;
2625   PetscScalar *x, zero = 0.0;
2626   MatScalar   *aa;
2627   PetscInt     ncols, brow, krow, kcol;
2628 
2629   PetscFunctionBegin;
2630   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2631   bs  = A->rmap->bs;
2632   aa  = a->a;
2633   ai  = a->i;
2634   aj  = a->j;
2635   mbs = a->mbs;
2636 
2637   PetscCall(VecSet(v, zero));
2638   PetscCall(VecGetArray(v, &x));
2639   PetscCall(VecGetLocalSize(v, &n));
2640   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2641   for (i = 0; i < mbs; i++) {
2642     ncols = ai[1] - ai[0];
2643     ai++;
2644     brow = bs * i;
2645     for (j = 0; j < ncols; j++) {
2646       for (kcol = 0; kcol < bs; kcol++) {
2647         for (krow = 0; krow < bs; krow++) {
2648           atmp = PetscAbsScalar(*aa);
2649           aa++;
2650           row = brow + krow; /* row index */
2651           if (PetscAbsScalar(x[row]) < atmp) {
2652             x[row] = atmp;
2653             if (idx) idx[row] = bs * (*aj) + kcol;
2654           }
2655         }
2656       }
2657       aj++;
2658     }
2659   }
2660   PetscCall(VecRestoreArray(v, &x));
2661   PetscFunctionReturn(PETSC_SUCCESS);
2662 }
2663 
2664 static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v)
2665 {
2666   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2667   PetscInt     i, j, n, row, bs, *ai, mbs;
2668   PetscReal    atmp;
2669   PetscScalar *x, zero = 0.0;
2670   MatScalar   *aa;
2671   PetscInt     ncols, brow, krow, kcol;
2672 
2673   PetscFunctionBegin;
2674   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2675   bs  = A->rmap->bs;
2676   aa  = a->a;
2677   ai  = a->i;
2678   mbs = a->mbs;
2679 
2680   PetscCall(VecSet(v, zero));
2681   PetscCall(VecGetArrayWrite(v, &x));
2682   PetscCall(VecGetLocalSize(v, &n));
2683   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2684   for (i = 0; i < mbs; i++) {
2685     ncols = ai[1] - ai[0];
2686     ai++;
2687     brow = bs * i;
2688     for (j = 0; j < ncols; j++) {
2689       for (kcol = 0; kcol < bs; kcol++) {
2690         for (krow = 0; krow < bs; krow++) {
2691           atmp = PetscAbsScalar(*aa);
2692           aa++;
2693           row = brow + krow; /* row index */
2694           x[row] += atmp;
2695         }
2696       }
2697     }
2698   }
2699   PetscCall(VecRestoreArrayWrite(v, &x));
2700   PetscFunctionReturn(PETSC_SUCCESS);
2701 }
2702 
2703 static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2704 {
2705   PetscFunctionBegin;
2706   /* If the two matrices have the same copy implementation, use fast copy. */
2707   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2708     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
2709     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2710     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
2711 
2712     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
2713     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
2714     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
2715     PetscCall(PetscObjectStateIncrease((PetscObject)B));
2716   } else {
2717     PetscCall(MatCopy_Basic(A, B, str));
2718   }
2719   PetscFunctionReturn(PETSC_SUCCESS);
2720 }
2721 
2722 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2723 {
2724   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2725 
2726   PetscFunctionBegin;
2727   *array = a->a;
2728   PetscFunctionReturn(PETSC_SUCCESS);
2729 }
2730 
2731 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2732 {
2733   PetscFunctionBegin;
2734   *array = NULL;
2735   PetscFunctionReturn(PETSC_SUCCESS);
2736 }
2737 
2738 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2739 {
2740   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
2741   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
2742   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
2743 
2744   PetscFunctionBegin;
2745   /* Set the number of nonzeros in the new matrix */
2746   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
2747   PetscFunctionReturn(PETSC_SUCCESS);
2748 }
2749 
2750 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2751 {
2752   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
2753   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2754   PetscBLASInt one = 1;
2755 
2756   PetscFunctionBegin;
2757   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2758     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2759     if (e) {
2760       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2761       if (e) {
2762         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2763         if (e) str = SAME_NONZERO_PATTERN;
2764       }
2765     }
2766     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2767   }
2768   if (str == SAME_NONZERO_PATTERN) {
2769     PetscScalar  alpha = a;
2770     PetscBLASInt bnz;
2771     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2772     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
2773     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2774   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2775     PetscCall(MatAXPY_Basic(Y, a, X, str));
2776   } else {
2777     Mat       B;
2778     PetscInt *nnz;
2779     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
2780     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
2781     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2782     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2783     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
2784     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
2785     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
2786     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
2787     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
2788     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2789     PetscCall(MatHeaderMerge(Y, &B));
2790     PetscCall(PetscFree(nnz));
2791   }
2792   PetscFunctionReturn(PETSC_SUCCESS);
2793 }
2794 
2795 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2796 {
2797 #if PetscDefined(USE_COMPLEX)
2798   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2799   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2800   MatScalar   *aa = a->a;
2801 
2802   PetscFunctionBegin;
2803   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
2804   PetscFunctionReturn(PETSC_SUCCESS);
2805 #else
2806   (void)A;
2807   return PETSC_SUCCESS;
2808 #endif
2809 }
2810 
2811 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2812 {
2813 #if PetscDefined(USE_COMPLEX)
2814   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2815   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2816   MatScalar   *aa = a->a;
2817 
2818   PetscFunctionBegin;
2819   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
2820   PetscFunctionReturn(PETSC_SUCCESS);
2821 #else
2822   (void)A;
2823   return PETSC_SUCCESS;
2824 #endif
2825 }
2826 
2827 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2828 {
2829 #if PetscDefined(USE_COMPLEX)
2830   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2831   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2832   MatScalar   *aa = a->a;
2833 
2834   PetscFunctionBegin;
2835   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2836   PetscFunctionReturn(PETSC_SUCCESS);
2837 #else
2838   (void)A;
2839   return PETSC_SUCCESS;
2840 #endif
2841 }
2842 
2843 /*
2844     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2845 */
2846 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2847 {
2848   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
2849   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
2850   PetscInt     nz = a->i[m], row, *jj, mr, col;
2851 
2852   PetscFunctionBegin;
2853   *nn = n;
2854   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2855   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
2856   PetscCall(PetscCalloc1(n, &collengths));
2857   PetscCall(PetscMalloc1(n + 1, &cia));
2858   PetscCall(PetscMalloc1(nz, &cja));
2859   jj = a->j;
2860   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2861   cia[0] = oshift;
2862   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2863   PetscCall(PetscArrayzero(collengths, n));
2864   jj = a->j;
2865   for (row = 0; row < m; row++) {
2866     mr = a->i[row + 1] - a->i[row];
2867     for (i = 0; i < mr; i++) {
2868       col = *jj++;
2869 
2870       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2871     }
2872   }
2873   PetscCall(PetscFree(collengths));
2874   *ia = cia;
2875   *ja = cja;
2876   PetscFunctionReturn(PETSC_SUCCESS);
2877 }
2878 
2879 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2880 {
2881   PetscFunctionBegin;
2882   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2883   PetscCall(PetscFree(*ia));
2884   PetscCall(PetscFree(*ja));
2885   PetscFunctionReturn(PETSC_SUCCESS);
2886 }
2887 
2888 /*
2889  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2890  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2891  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2892  */
2893 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2894 {
2895   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2896   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2897   PetscInt     nz = a->i[m], row, *jj, mr, col;
2898   PetscInt    *cspidx;
2899 
2900   PetscFunctionBegin;
2901   *nn = n;
2902   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2903 
2904   PetscCall(PetscCalloc1(n, &collengths));
2905   PetscCall(PetscMalloc1(n + 1, &cia));
2906   PetscCall(PetscMalloc1(nz, &cja));
2907   PetscCall(PetscMalloc1(nz, &cspidx));
2908   jj = a->j;
2909   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2910   cia[0] = oshift;
2911   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2912   PetscCall(PetscArrayzero(collengths, n));
2913   jj = a->j;
2914   for (row = 0; row < m; row++) {
2915     mr = a->i[row + 1] - a->i[row];
2916     for (i = 0; i < mr; i++) {
2917       col                                         = *jj++;
2918       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2919       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2920     }
2921   }
2922   PetscCall(PetscFree(collengths));
2923   *ia    = cia;
2924   *ja    = cja;
2925   *spidx = cspidx;
2926   PetscFunctionReturn(PETSC_SUCCESS);
2927 }
2928 
2929 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2930 {
2931   PetscFunctionBegin;
2932   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
2933   PetscCall(PetscFree(*spidx));
2934   PetscFunctionReturn(PETSC_SUCCESS);
2935 }
2936 
2937 static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2938 {
2939   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
2940 
2941   PetscFunctionBegin;
2942   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
2943   PetscCall(MatShift_Basic(Y, a));
2944   PetscFunctionReturn(PETSC_SUCCESS);
2945 }
2946 
2947 PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep)
2948 {
2949   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2950   PetscInt     fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k;
2951   PetscInt     m = A->rmap->N, *ailen = a->ilen;
2952   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2953   MatScalar   *aa = a->a, *ap;
2954   PetscBool    zero;
2955 
2956   PetscFunctionBegin;
2957   PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix");
2958   if (m) rmax = ailen[0];
2959   for (i = 1; i <= mbs; i++) {
2960     for (k = ai[i - 1]; k < ai[i]; k++) {
2961       zero = PETSC_TRUE;
2962       ap   = aa + bs2 * k;
2963       for (j = 0; j < bs2 && zero; j++) {
2964         if (ap[j] != 0.0) zero = PETSC_FALSE;
2965       }
2966       if (zero && (aj[k] != i - 1 || !keep)) fshift++;
2967       else {
2968         if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1));
2969         aj[k - fshift] = aj[k];
2970         PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2));
2971       }
2972     }
2973     ai[i - 1] -= fshift_prev;
2974     fshift_prev  = fshift;
2975     ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1];
2976     a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0);
2977     rmax = PetscMax(rmax, ailen[i - 1]);
2978   }
2979   if (fshift) {
2980     if (mbs) {
2981       ai[mbs] -= fshift;
2982       a->nz = ai[mbs];
2983     }
2984     PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz));
2985     A->nonzerostate++;
2986     A->info.nz_unneeded += (PetscReal)fshift;
2987     a->rmax = rmax;
2988     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2989     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
2990   }
2991   PetscFunctionReturn(PETSC_SUCCESS);
2992 }
2993 
2994 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2995                                        MatGetRow_SeqBAIJ,
2996                                        MatRestoreRow_SeqBAIJ,
2997                                        MatMult_SeqBAIJ_N,
2998                                        /* 4*/ MatMultAdd_SeqBAIJ_N,
2999                                        MatMultTranspose_SeqBAIJ,
3000                                        MatMultTransposeAdd_SeqBAIJ,
3001                                        NULL,
3002                                        NULL,
3003                                        NULL,
3004                                        /* 10*/ NULL,
3005                                        MatLUFactor_SeqBAIJ,
3006                                        NULL,
3007                                        NULL,
3008                                        MatTranspose_SeqBAIJ,
3009                                        /* 15*/ MatGetInfo_SeqBAIJ,
3010                                        MatEqual_SeqBAIJ,
3011                                        MatGetDiagonal_SeqBAIJ,
3012                                        MatDiagonalScale_SeqBAIJ,
3013                                        MatNorm_SeqBAIJ,
3014                                        /* 20*/ NULL,
3015                                        MatAssemblyEnd_SeqBAIJ,
3016                                        MatSetOption_SeqBAIJ,
3017                                        MatZeroEntries_SeqBAIJ,
3018                                        /* 24*/ MatZeroRows_SeqBAIJ,
3019                                        NULL,
3020                                        NULL,
3021                                        NULL,
3022                                        NULL,
3023                                        /* 29*/ MatSetUp_Seq_Hash,
3024                                        NULL,
3025                                        NULL,
3026                                        NULL,
3027                                        NULL,
3028                                        /* 34*/ MatDuplicate_SeqBAIJ,
3029                                        NULL,
3030                                        NULL,
3031                                        MatILUFactor_SeqBAIJ,
3032                                        NULL,
3033                                        /* 39*/ MatAXPY_SeqBAIJ,
3034                                        MatCreateSubMatrices_SeqBAIJ,
3035                                        MatIncreaseOverlap_SeqBAIJ,
3036                                        MatGetValues_SeqBAIJ,
3037                                        MatCopy_SeqBAIJ,
3038                                        /* 44*/ NULL,
3039                                        MatScale_SeqBAIJ,
3040                                        MatShift_SeqBAIJ,
3041                                        NULL,
3042                                        MatZeroRowsColumns_SeqBAIJ,
3043                                        /* 49*/ NULL,
3044                                        MatGetRowIJ_SeqBAIJ,
3045                                        MatRestoreRowIJ_SeqBAIJ,
3046                                        MatGetColumnIJ_SeqBAIJ,
3047                                        MatRestoreColumnIJ_SeqBAIJ,
3048                                        /* 54*/ MatFDColoringCreate_SeqXAIJ,
3049                                        NULL,
3050                                        NULL,
3051                                        NULL,
3052                                        MatSetValuesBlocked_SeqBAIJ,
3053                                        /* 59*/ MatCreateSubMatrix_SeqBAIJ,
3054                                        MatDestroy_SeqBAIJ,
3055                                        MatView_SeqBAIJ,
3056                                        NULL,
3057                                        NULL,
3058                                        /* 64*/ NULL,
3059                                        NULL,
3060                                        NULL,
3061                                        NULL,
3062                                        MatGetRowMaxAbs_SeqBAIJ,
3063                                        /* 69*/ NULL,
3064                                        MatConvert_Basic,
3065                                        NULL,
3066                                        MatFDColoringApply_BAIJ,
3067                                        NULL,
3068                                        /* 74*/ NULL,
3069                                        NULL,
3070                                        NULL,
3071                                        NULL,
3072                                        MatLoad_SeqBAIJ,
3073                                        /* 79*/ NULL,
3074                                        NULL,
3075                                        NULL,
3076                                        NULL,
3077                                        NULL,
3078                                        /* 84*/ NULL,
3079                                        NULL,
3080                                        NULL,
3081                                        NULL,
3082                                        NULL,
3083                                        /* 89*/ NULL,
3084                                        NULL,
3085                                        NULL,
3086                                        NULL,
3087                                        MatConjugate_SeqBAIJ,
3088                                        /* 94*/ NULL,
3089                                        NULL,
3090                                        MatRealPart_SeqBAIJ,
3091                                        MatImaginaryPart_SeqBAIJ,
3092                                        NULL,
3093                                        /* 99*/ NULL,
3094                                        NULL,
3095                                        NULL,
3096                                        NULL,
3097                                        NULL,
3098                                        /*104*/ MatMissingDiagonal_SeqBAIJ,
3099                                        NULL,
3100                                        NULL,
3101                                        NULL,
3102                                        NULL,
3103                                        /*109*/ NULL,
3104                                        NULL,
3105                                        NULL,
3106                                        MatMultHermitianTranspose_SeqBAIJ,
3107                                        MatMultHermitianTransposeAdd_SeqBAIJ,
3108                                        /*114*/ NULL,
3109                                        NULL,
3110                                        MatGetColumnReductions_SeqBAIJ,
3111                                        MatInvertBlockDiagonal_SeqBAIJ,
3112                                        NULL,
3113                                        /*119*/ NULL,
3114                                        NULL,
3115                                        NULL,
3116                                        NULL,
3117                                        NULL,
3118                                        /*124*/ NULL,
3119                                        NULL,
3120                                        NULL,
3121                                        MatSetBlockSizes_Default,
3122                                        NULL,
3123                                        /*129*/ MatFDColoringSetUp_SeqXAIJ,
3124                                        NULL,
3125                                        MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3126                                        MatDestroySubMatrices_SeqBAIJ,
3127                                        NULL,
3128                                        /*134*/ NULL,
3129                                        NULL,
3130                                        NULL,
3131                                        MatEliminateZeros_SeqBAIJ,
3132                                        MatGetRowSumAbs_SeqBAIJ,
3133                                        /*139*/ NULL,
3134                                        NULL,
3135                                        NULL,
3136                                        MatCopyHashToXAIJ_Seq_Hash,
3137                                        NULL,
3138                                        NULL};
3139 
3140 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3141 {
3142   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3143   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3144 
3145   PetscFunctionBegin;
3146   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3147 
3148   /* allocate space for values if not already there */
3149   if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
3150 
3151   /* copy values over */
3152   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
3153   PetscFunctionReturn(PETSC_SUCCESS);
3154 }
3155 
3156 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3157 {
3158   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3159   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3160 
3161   PetscFunctionBegin;
3162   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3163   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
3164 
3165   /* copy values over */
3166   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
3167   PetscFunctionReturn(PETSC_SUCCESS);
3168 }
3169 
3170 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3171 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3172 
3173 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3174 {
3175   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
3176   PetscInt     i, mbs, nbs, bs2;
3177   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3178 
3179   PetscFunctionBegin;
3180   if (B->hash_active) {
3181     PetscInt bs;
3182     B->ops[0] = b->cops;
3183     PetscCall(PetscHMapIJVDestroy(&b->ht));
3184     PetscCall(MatGetBlockSize(B, &bs));
3185     if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht));
3186     PetscCall(PetscFree(b->dnz));
3187     PetscCall(PetscFree(b->bdnz));
3188     B->hash_active = PETSC_FALSE;
3189   }
3190   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3191   if (nz == MAT_SKIP_ALLOCATION) {
3192     skipallocation = PETSC_TRUE;
3193     nz             = 0;
3194   }
3195 
3196   PetscCall(MatSetBlockSize(B, bs));
3197   PetscCall(PetscLayoutSetUp(B->rmap));
3198   PetscCall(PetscLayoutSetUp(B->cmap));
3199   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3200 
3201   B->preallocated = PETSC_TRUE;
3202 
3203   mbs = B->rmap->n / bs;
3204   nbs = B->cmap->n / bs;
3205   bs2 = bs * bs;
3206 
3207   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3208 
3209   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3210   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3211   if (nnz) {
3212     for (i = 0; i < mbs; i++) {
3213       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
3214       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3215     }
3216   }
3217 
3218   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
3219   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3220   PetscOptionsEnd();
3221 
3222   if (!flg) {
3223     switch (bs) {
3224     case 1:
3225       B->ops->mult    = MatMult_SeqBAIJ_1;
3226       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3227       break;
3228     case 2:
3229       B->ops->mult    = MatMult_SeqBAIJ_2;
3230       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3231       break;
3232     case 3:
3233       B->ops->mult    = MatMult_SeqBAIJ_3;
3234       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3235       break;
3236     case 4:
3237       B->ops->mult    = MatMult_SeqBAIJ_4;
3238       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3239       break;
3240     case 5:
3241       B->ops->mult    = MatMult_SeqBAIJ_5;
3242       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3243       break;
3244     case 6:
3245       B->ops->mult    = MatMult_SeqBAIJ_6;
3246       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3247       break;
3248     case 7:
3249       B->ops->mult    = MatMult_SeqBAIJ_7;
3250       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3251       break;
3252     case 9: {
3253       PetscInt version = 1;
3254       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3255       switch (version) {
3256 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3257       case 1:
3258         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
3259         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
3260         PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3261         break;
3262 #endif
3263       default:
3264         B->ops->mult    = MatMult_SeqBAIJ_N;
3265         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3266         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3267         break;
3268       }
3269       break;
3270     }
3271     case 11:
3272       B->ops->mult    = MatMult_SeqBAIJ_11;
3273       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3274       break;
3275     case 12: {
3276       PetscInt version = 1;
3277       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3278       switch (version) {
3279       case 1:
3280         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
3281         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3282         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3283         break;
3284       case 2:
3285         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
3286         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
3287         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3288         break;
3289 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3290       case 3:
3291         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
3292         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3293         PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3294         break;
3295 #endif
3296       default:
3297         B->ops->mult    = MatMult_SeqBAIJ_N;
3298         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3299         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3300         break;
3301       }
3302       break;
3303     }
3304     case 15: {
3305       PetscInt version = 1;
3306       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3307       switch (version) {
3308       case 1:
3309         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3310         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3311         break;
3312       case 2:
3313         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
3314         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3315         break;
3316       case 3:
3317         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3318         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3319         break;
3320       case 4:
3321         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3322         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3323         break;
3324       default:
3325         B->ops->mult = MatMult_SeqBAIJ_N;
3326         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3327         break;
3328       }
3329       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3330       break;
3331     }
3332     default:
3333       B->ops->mult    = MatMult_SeqBAIJ_N;
3334       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3335       PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3336       break;
3337     }
3338   }
3339   B->ops->sor = MatSOR_SeqBAIJ;
3340   b->mbs      = mbs;
3341   b->nbs      = nbs;
3342   if (!skipallocation) {
3343     if (!b->imax) {
3344       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
3345 
3346       b->free_imax_ilen = PETSC_TRUE;
3347     }
3348     /* b->ilen will count nonzeros in each block row so far. */
3349     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3350     if (!nnz) {
3351       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3352       else if (nz < 0) nz = 1;
3353       nz = PetscMin(nz, nbs);
3354       for (i = 0; i < mbs; i++) b->imax[i] = nz;
3355       PetscCall(PetscIntMultError(nz, mbs, &nz));
3356     } else {
3357       PetscInt64 nz64 = 0;
3358       for (i = 0; i < mbs; i++) {
3359         b->imax[i] = nnz[i];
3360         nz64 += nnz[i];
3361       }
3362       PetscCall(PetscIntCast(nz64, &nz));
3363     }
3364 
3365     /* allocate the matrix space */
3366     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3367     PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j));
3368     PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i));
3369     if (B->structure_only) {
3370       b->free_a = PETSC_FALSE;
3371     } else {
3372       PetscInt nzbs2 = 0;
3373       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
3374       PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a));
3375       b->free_a = PETSC_TRUE;
3376       PetscCall(PetscArrayzero(b->a, nz * bs2));
3377     }
3378     b->free_ij = PETSC_TRUE;
3379     PetscCall(PetscArrayzero(b->j, nz));
3380 
3381     b->i[0] = 0;
3382     for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3383   } else {
3384     b->free_a  = PETSC_FALSE;
3385     b->free_ij = PETSC_FALSE;
3386   }
3387 
3388   b->bs2              = bs2;
3389   b->mbs              = mbs;
3390   b->nz               = 0;
3391   b->maxnz            = nz;
3392   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3393   B->was_assembled    = PETSC_FALSE;
3394   B->assembled        = PETSC_FALSE;
3395   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3396   PetscFunctionReturn(PETSC_SUCCESS);
3397 }
3398 
3399 static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3400 {
3401   PetscInt     i, m, nz, nz_max = 0, *nnz;
3402   PetscScalar *values      = NULL;
3403   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3404 
3405   PetscFunctionBegin;
3406   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
3407   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
3408   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
3409   PetscCall(PetscLayoutSetUp(B->rmap));
3410   PetscCall(PetscLayoutSetUp(B->cmap));
3411   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3412   m = B->rmap->n / bs;
3413 
3414   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
3415   PetscCall(PetscMalloc1(m + 1, &nnz));
3416   for (i = 0; i < m; i++) {
3417     nz = ii[i + 1] - ii[i];
3418     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3419     nz_max = PetscMax(nz_max, nz);
3420     nnz[i] = nz;
3421   }
3422   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
3423   PetscCall(PetscFree(nnz));
3424 
3425   values = (PetscScalar *)V;
3426   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3427   for (i = 0; i < m; i++) {
3428     PetscInt        ncols = ii[i + 1] - ii[i];
3429     const PetscInt *icols = jj + ii[i];
3430     if (bs == 1 || !roworiented) {
3431       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
3432       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
3433     } else {
3434       PetscInt j;
3435       for (j = 0; j < ncols; j++) {
3436         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
3437         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
3438       }
3439     }
3440   }
3441   if (!V) PetscCall(PetscFree(values));
3442   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3443   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3444   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3445   PetscFunctionReturn(PETSC_SUCCESS);
3446 }
3447 
3448 /*@C
3449   MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3450 
3451   Not Collective
3452 
3453   Input Parameter:
3454 . A - a `MATSEQBAIJ` matrix
3455 
3456   Output Parameter:
3457 . array - pointer to the data
3458 
3459   Level: intermediate
3460 
3461 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3462 @*/
3463 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[])
3464 {
3465   PetscFunctionBegin;
3466   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
3467   PetscFunctionReturn(PETSC_SUCCESS);
3468 }
3469 
3470 /*@C
3471   MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3472 
3473   Not Collective
3474 
3475   Input Parameters:
3476 + A     - a `MATSEQBAIJ` matrix
3477 - array - pointer to the data
3478 
3479   Level: intermediate
3480 
3481 .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3482 @*/
3483 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[])
3484 {
3485   PetscFunctionBegin;
3486   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
3487   PetscFunctionReturn(PETSC_SUCCESS);
3488 }
3489 
3490 /*MC
3491    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3492    block sparse compressed row format.
3493 
3494    Options Database Keys:
3495 + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()`
3496 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3497 
3498    Level: beginner
3499 
3500    Notes:
3501    `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
3502    space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
3503 
3504    Run with `-info` to see what version of the matrix-vector product is being used
3505 
3506 .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()`
3507 M*/
3508 
3509 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3510 
3511 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3512 {
3513   PetscMPIInt  size;
3514   Mat_SeqBAIJ *b;
3515 
3516   PetscFunctionBegin;
3517   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
3518   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3519 
3520   PetscCall(PetscNew(&b));
3521   B->data   = (void *)b;
3522   B->ops[0] = MatOps_Values;
3523 
3524   b->row          = NULL;
3525   b->col          = NULL;
3526   b->icol         = NULL;
3527   b->reallocs     = 0;
3528   b->saved_values = NULL;
3529 
3530   b->roworiented        = PETSC_TRUE;
3531   b->nonew              = 0;
3532   b->diag               = NULL;
3533   B->spptr              = NULL;
3534   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3535   b->keepnonzeropattern = PETSC_FALSE;
3536 
3537   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
3538   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
3539   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
3540   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
3541   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
3542   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
3543   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
3544   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
3545   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
3546   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
3547 #if defined(PETSC_HAVE_HYPRE)
3548   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
3549 #endif
3550   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
3551   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
3552   PetscFunctionReturn(PETSC_SUCCESS);
3553 }
3554 
3555 PETSC_INTERN PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3556 {
3557   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3558   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3559 
3560   PetscFunctionBegin;
3561   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix");
3562   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
3563 
3564   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3565     c->imax           = a->imax;
3566     c->ilen           = a->ilen;
3567     c->free_imax_ilen = PETSC_FALSE;
3568   } else {
3569     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3570     for (i = 0; i < mbs; i++) {
3571       c->imax[i] = a->imax[i];
3572       c->ilen[i] = a->ilen[i];
3573     }
3574     c->free_imax_ilen = PETSC_TRUE;
3575   }
3576 
3577   /* allocate the matrix space */
3578   if (mallocmatspace) {
3579     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3580       PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
3581       PetscCall(PetscArrayzero(c->a, bs2 * nz));
3582       c->free_a       = PETSC_TRUE;
3583       c->i            = a->i;
3584       c->j            = a->j;
3585       c->free_ij      = PETSC_FALSE;
3586       c->parent       = A;
3587       C->preallocated = PETSC_TRUE;
3588       C->assembled    = PETSC_TRUE;
3589 
3590       PetscCall(PetscObjectReference((PetscObject)A));
3591       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3592       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3593     } else {
3594       PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
3595       PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j));
3596       PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i));
3597       c->free_a  = PETSC_TRUE;
3598       c->free_ij = PETSC_TRUE;
3599 
3600       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3601       if (mbs > 0) {
3602         PetscCall(PetscArraycpy(c->j, a->j, nz));
3603         if (cpvalues == MAT_COPY_VALUES) {
3604           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
3605         } else {
3606           PetscCall(PetscArrayzero(c->a, bs2 * nz));
3607         }
3608       }
3609       C->preallocated = PETSC_TRUE;
3610       C->assembled    = PETSC_TRUE;
3611     }
3612   }
3613 
3614   c->roworiented = a->roworiented;
3615   c->nonew       = a->nonew;
3616 
3617   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
3618   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
3619 
3620   c->bs2 = a->bs2;
3621   c->mbs = a->mbs;
3622   c->nbs = a->nbs;
3623 
3624   if (a->diag) {
3625     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3626       c->diag      = a->diag;
3627       c->free_diag = PETSC_FALSE;
3628     } else {
3629       PetscCall(PetscMalloc1(mbs + 1, &c->diag));
3630       for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i];
3631       c->free_diag = PETSC_TRUE;
3632     }
3633   } else c->diag = NULL;
3634 
3635   c->nz         = a->nz;
3636   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3637   c->solve_work = NULL;
3638   c->mult_work  = NULL;
3639   c->sor_workt  = NULL;
3640   c->sor_work   = NULL;
3641 
3642   c->compressedrow.use   = a->compressedrow.use;
3643   c->compressedrow.nrows = a->compressedrow.nrows;
3644   if (a->compressedrow.use) {
3645     i = a->compressedrow.nrows;
3646     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
3647     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
3648     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
3649   } else {
3650     c->compressedrow.use    = PETSC_FALSE;
3651     c->compressedrow.i      = NULL;
3652     c->compressedrow.rindex = NULL;
3653   }
3654   c->nonzerorowcnt = a->nonzerorowcnt;
3655   C->nonzerostate  = A->nonzerostate;
3656 
3657   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
3658   PetscFunctionReturn(PETSC_SUCCESS);
3659 }
3660 
3661 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3662 {
3663   PetscFunctionBegin;
3664   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
3665   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
3666   PetscCall(MatSetType(*B, MATSEQBAIJ));
3667   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
3668   PetscFunctionReturn(PETSC_SUCCESS);
3669 }
3670 
3671 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3672 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3673 {
3674   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3675   PetscInt    *rowidxs, *colidxs;
3676   PetscScalar *matvals;
3677 
3678   PetscFunctionBegin;
3679   PetscCall(PetscViewerSetUp(viewer));
3680 
3681   /* read matrix header */
3682   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3683   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3684   M  = header[1];
3685   N  = header[2];
3686   nz = header[3];
3687   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3688   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3689   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3690 
3691   /* set block sizes from the viewer's .info file */
3692   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3693   /* set local and global sizes if not set already */
3694   if (mat->rmap->n < 0) mat->rmap->n = M;
3695   if (mat->cmap->n < 0) mat->cmap->n = N;
3696   if (mat->rmap->N < 0) mat->rmap->N = M;
3697   if (mat->cmap->N < 0) mat->cmap->N = N;
3698   PetscCall(PetscLayoutSetUp(mat->rmap));
3699   PetscCall(PetscLayoutSetUp(mat->cmap));
3700 
3701   /* check if the matrix sizes are correct */
3702   PetscCall(MatGetSize(mat, &rows, &cols));
3703   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3704   PetscCall(MatGetBlockSize(mat, &bs));
3705   PetscCall(MatGetLocalSize(mat, &m, &n));
3706   mbs = m / bs;
3707   nbs = n / bs;
3708 
3709   /* read in row lengths, column indices and nonzero values */
3710   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3711   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
3712   rowidxs[0] = 0;
3713   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3714   sum = rowidxs[m];
3715   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3716 
3717   /* read in column indices and nonzero values */
3718   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
3719   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
3720   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3721 
3722   {               /* preallocate matrix storage */
3723     PetscBT   bt; /* helper bit set to count nonzeros */
3724     PetscInt *nnz;
3725     PetscBool sbaij;
3726 
3727     PetscCall(PetscBTCreate(nbs, &bt));
3728     PetscCall(PetscCalloc1(mbs, &nnz));
3729     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3730     for (i = 0; i < mbs; i++) {
3731       PetscCall(PetscBTMemzero(nbs, bt));
3732       for (k = 0; k < bs; k++) {
3733         PetscInt row = bs * i + k;
3734         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3735           PetscInt col = colidxs[j];
3736           if (!sbaij || col >= row)
3737             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3738         }
3739       }
3740     }
3741     PetscCall(PetscBTDestroy(&bt));
3742     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
3743     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
3744     PetscCall(PetscFree(nnz));
3745   }
3746 
3747   /* store matrix values */
3748   for (i = 0; i < m; i++) {
3749     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
3750     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3751   }
3752 
3753   PetscCall(PetscFree(rowidxs));
3754   PetscCall(PetscFree2(colidxs, matvals));
3755   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3756   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3757   PetscFunctionReturn(PETSC_SUCCESS);
3758 }
3759 
3760 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3761 {
3762   PetscBool isbinary;
3763 
3764   PetscFunctionBegin;
3765   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3766   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3767   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
3768   PetscFunctionReturn(PETSC_SUCCESS);
3769 }
3770 
3771 /*@
3772   MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3773   compressed row) format.  For good matrix assembly performance the
3774   user should preallocate the matrix storage by setting the parameter `nz`
3775   (or the array `nnz`).
3776 
3777   Collective
3778 
3779   Input Parameters:
3780 + comm - MPI communicator, set to `PETSC_COMM_SELF`
3781 . bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3782          blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3783 . m    - number of rows
3784 . n    - number of columns
3785 . nz   - number of nonzero blocks  per block row (same for all rows)
3786 - nnz  - array containing the number of nonzero blocks in the various block rows
3787          (possibly different for each block row) or `NULL`
3788 
3789   Output Parameter:
3790 . A - the matrix
3791 
3792   Options Database Keys:
3793 + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3794 - -mat_block_size - size of the blocks to use
3795 
3796   Level: intermediate
3797 
3798   Notes:
3799   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3800   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3801   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
3802 
3803   The number of rows and columns must be divisible by blocksize.
3804 
3805   If the `nnz` parameter is given then the `nz` parameter is ignored
3806 
3807   A nonzero block is any block that as 1 or more nonzeros in it
3808 
3809   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3810   storage.  That is, the stored row and column indices can begin at
3811   either one (as in Fortran) or zero.
3812 
3813   Specify the preallocated storage with either `nz` or `nnz` (not both).
3814   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3815   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3816   matrices.
3817 
3818 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3819 @*/
3820 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3821 {
3822   PetscFunctionBegin;
3823   PetscCall(MatCreate(comm, A));
3824   PetscCall(MatSetSizes(*A, m, n, m, n));
3825   PetscCall(MatSetType(*A, MATSEQBAIJ));
3826   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
3827   PetscFunctionReturn(PETSC_SUCCESS);
3828 }
3829 
3830 /*@
3831   MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3832   per row in the matrix. For good matrix assembly performance the
3833   user should preallocate the matrix storage by setting the parameter `nz`
3834   (or the array `nnz`).
3835 
3836   Collective
3837 
3838   Input Parameters:
3839 + B   - the matrix
3840 . bs  - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3841         blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3842 . nz  - number of block nonzeros per block row (same for all rows)
3843 - nnz - array containing the number of block nonzeros in the various block rows
3844         (possibly different for each block row) or `NULL`
3845 
3846   Options Database Keys:
3847 + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3848 - -mat_block_size - size of the blocks to use
3849 
3850   Level: intermediate
3851 
3852   Notes:
3853   If the `nnz` parameter is given then the `nz` parameter is ignored
3854 
3855   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3856   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3857   You can also run with the option `-info` and look for messages with the string
3858   malloc in them to see if additional memory allocation was needed.
3859 
3860   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3861   storage.  That is, the stored row and column indices can begin at
3862   either one (as in Fortran) or zero.
3863 
3864   Specify the preallocated storage with either `nz` or `nnz` (not both).
3865   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3866   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3867 
3868 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3869 @*/
3870 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3871 {
3872   PetscFunctionBegin;
3873   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3874   PetscValidType(B, 1);
3875   PetscValidLogicalCollectiveInt(B, bs, 2);
3876   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
3877   PetscFunctionReturn(PETSC_SUCCESS);
3878 }
3879 
3880 /*@C
3881   MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3882 
3883   Collective
3884 
3885   Input Parameters:
3886 + B  - the matrix
3887 . bs - the blocksize
3888 . i  - the indices into `j` for the start of each local row (indices start with zero)
3889 . j  - the column indices for each local row (indices start with zero) these must be sorted for each row
3890 - v  - optional values in the matrix, use `NULL` if not provided
3891 
3892   Level: advanced
3893 
3894   Notes:
3895   The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()`
3896 
3897   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
3898   may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
3899   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3900   `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3901   block column and the second index is over columns within a block.
3902 
3903   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3904 
3905 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3906 @*/
3907 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3908 {
3909   PetscFunctionBegin;
3910   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3911   PetscValidType(B, 1);
3912   PetscValidLogicalCollectiveInt(B, bs, 2);
3913   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
3914   PetscFunctionReturn(PETSC_SUCCESS);
3915 }
3916 
3917 /*@
3918   MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3919 
3920   Collective
3921 
3922   Input Parameters:
3923 + comm - must be an MPI communicator of size 1
3924 . bs   - size of block
3925 . m    - number of rows
3926 . n    - number of columns
3927 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3928 . j    - column indices
3929 - a    - matrix values
3930 
3931   Output Parameter:
3932 . mat - the matrix
3933 
3934   Level: advanced
3935 
3936   Notes:
3937   The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays
3938   once the matrix is destroyed
3939 
3940   You cannot set new nonzero locations into this matrix, that will generate an error.
3941 
3942   The `i` and `j` indices are 0 based
3943 
3944   When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3945 
3946   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3947   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3948   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3949   with column-major ordering within blocks.
3950 
3951 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3952 @*/
3953 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3954 {
3955   Mat_SeqBAIJ *baij;
3956 
3957   PetscFunctionBegin;
3958   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
3959   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3960 
3961   PetscCall(MatCreate(comm, mat));
3962   PetscCall(MatSetSizes(*mat, m, n, m, n));
3963   PetscCall(MatSetType(*mat, MATSEQBAIJ));
3964   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3965   baij = (Mat_SeqBAIJ *)(*mat)->data;
3966   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3967 
3968   baij->i = i;
3969   baij->j = j;
3970   baij->a = a;
3971 
3972   baij->nonew          = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3973   baij->free_a         = PETSC_FALSE;
3974   baij->free_ij        = PETSC_FALSE;
3975   baij->free_imax_ilen = PETSC_TRUE;
3976 
3977   for (PetscInt ii = 0; ii < m; ii++) {
3978     const PetscInt row_len = i[ii + 1] - i[ii];
3979 
3980     baij->ilen[ii] = baij->imax[ii] = row_len;
3981     PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
3982   }
3983   if (PetscDefined(USE_DEBUG)) {
3984     for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
3985       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3986       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3987     }
3988   }
3989 
3990   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3991   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3992   PetscFunctionReturn(PETSC_SUCCESS);
3993 }
3994 
3995 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3996 {
3997   PetscFunctionBegin;
3998   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
3999   PetscFunctionReturn(PETSC_SUCCESS);
4000 }
4001