xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 8b5d2d36b1bd7331337e6600e2fff187f080efc8)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h> /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
19 
20 PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
21 {
22   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
23   PetscInt     m, n, i;
24   PetscInt     ib, jb, bs = A->rmap->bs;
25   MatScalar   *a_val = a_aij->a;
26 
27   PetscFunctionBegin;
28   PetscCall(MatGetSize(A, &m, &n));
29   for (i = 0; i < n; i++) reductions[i] = 0.0;
30   if (type == NORM_2) {
31     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
32       for (jb = 0; jb < bs; jb++) {
33         for (ib = 0; ib < bs; ib++) {
34           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
35           a_val++;
36         }
37       }
38     }
39   } else if (type == NORM_1) {
40     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
41       for (jb = 0; jb < bs; jb++) {
42         for (ib = 0; ib < bs; ib++) {
43           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
44           a_val++;
45         }
46       }
47     }
48   } else if (type == NORM_INFINITY) {
49     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
50       for (jb = 0; jb < bs; jb++) {
51         for (ib = 0; ib < bs; ib++) {
52           int col         = A->cmap->rstart + a_aij->j[i] * bs + jb;
53           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
54           a_val++;
55         }
56       }
57     }
58   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
59     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
60       for (jb = 0; jb < bs; jb++) {
61         for (ib = 0; ib < bs; ib++) {
62           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
63           a_val++;
64         }
65       }
66     }
67   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
68     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
69       for (jb = 0; jb < bs; jb++) {
70         for (ib = 0; ib < bs; ib++) {
71           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
72           a_val++;
73         }
74       }
75     }
76   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
77   if (type == NORM_2) {
78     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
79   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
80     for (i = 0; i < n; i++) reductions[i] /= m;
81   }
82   PetscFunctionReturn(0);
83 }
84 
85 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
86 {
87   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
88   PetscInt    *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
89   MatScalar   *v     = a->a, *odiag, *diag, work[25], *v_work;
90   PetscReal    shift = 0.0;
91   PetscBool    allowzeropivot, zeropivotdetected = PETSC_FALSE;
92 
93   PetscFunctionBegin;
94   allowzeropivot = PetscNot(A->erroriffailure);
95 
96   if (a->idiagvalid) {
97     if (values) *values = a->idiag;
98     PetscFunctionReturn(0);
99   }
100   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
101   diag_offset = a->diag;
102   if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); }
103   diag = a->idiag;
104   if (values) *values = a->idiag;
105   /* factor and invert each block */
106   switch (bs) {
107   case 1:
108     for (i = 0; i < mbs; i++) {
109       odiag   = v + 1 * diag_offset[i];
110       diag[0] = odiag[0];
111 
112       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
113         if (allowzeropivot) {
114           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
115           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
116           A->factorerror_zeropivot_row   = i;
117           PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
118         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
119       }
120 
121       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
122       diag += 1;
123     }
124     break;
125   case 2:
126     for (i = 0; i < mbs; i++) {
127       odiag   = v + 4 * diag_offset[i];
128       diag[0] = odiag[0];
129       diag[1] = odiag[1];
130       diag[2] = odiag[2];
131       diag[3] = odiag[3];
132       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
133       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
134       diag += 4;
135     }
136     break;
137   case 3:
138     for (i = 0; i < mbs; i++) {
139       odiag   = v + 9 * diag_offset[i];
140       diag[0] = odiag[0];
141       diag[1] = odiag[1];
142       diag[2] = odiag[2];
143       diag[3] = odiag[3];
144       diag[4] = odiag[4];
145       diag[5] = odiag[5];
146       diag[6] = odiag[6];
147       diag[7] = odiag[7];
148       diag[8] = odiag[8];
149       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
150       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
151       diag += 9;
152     }
153     break;
154   case 4:
155     for (i = 0; i < mbs; i++) {
156       odiag = v + 16 * diag_offset[i];
157       PetscCall(PetscArraycpy(diag, odiag, 16));
158       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
159       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
160       diag += 16;
161     }
162     break;
163   case 5:
164     for (i = 0; i < mbs; i++) {
165       odiag = v + 25 * diag_offset[i];
166       PetscCall(PetscArraycpy(diag, odiag, 25));
167       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
168       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
169       diag += 25;
170     }
171     break;
172   case 6:
173     for (i = 0; i < mbs; i++) {
174       odiag = v + 36 * diag_offset[i];
175       PetscCall(PetscArraycpy(diag, odiag, 36));
176       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
177       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
178       diag += 36;
179     }
180     break;
181   case 7:
182     for (i = 0; i < mbs; i++) {
183       odiag = v + 49 * diag_offset[i];
184       PetscCall(PetscArraycpy(diag, odiag, 49));
185       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
186       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
187       diag += 49;
188     }
189     break;
190   default:
191     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
192     for (i = 0; i < mbs; i++) {
193       odiag = v + bs2 * diag_offset[i];
194       PetscCall(PetscArraycpy(diag, odiag, bs2));
195       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
196       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
197       diag += bs2;
198     }
199     PetscCall(PetscFree2(v_work, v_pivots));
200   }
201   a->idiagvalid = PETSC_TRUE;
202   PetscFunctionReturn(0);
203 }
204 
205 PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
206 {
207   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
208   PetscScalar       *x, *work, *w, *workt, *t;
209   const MatScalar   *v, *aa = a->a, *idiag;
210   const PetscScalar *b, *xb;
211   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
212   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
213   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;
214 
215   PetscFunctionBegin;
216   its = its * lits;
217   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
218   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
219   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
220   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
221   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
222 
223   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
224 
225   if (!m) PetscFunctionReturn(0);
226   diag  = a->diag;
227   idiag = a->idiag;
228   k     = PetscMax(A->rmap->n, A->cmap->n);
229   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
230   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
231   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
232   work = a->mult_work;
233   t    = a->sor_workt;
234   w    = a->sor_work;
235 
236   PetscCall(VecGetArray(xx, &x));
237   PetscCall(VecGetArrayRead(bb, &b));
238 
239   if (flag & SOR_ZERO_INITIAL_GUESS) {
240     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
241       switch (bs) {
242       case 1:
243         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
244         t[0] = b[0];
245         i2   = 1;
246         idiag += 1;
247         for (i = 1; i < m; i++) {
248           v    = aa + ai[i];
249           vi   = aj + ai[i];
250           nz   = diag[i] - ai[i];
251           s[0] = b[i2];
252           for (j = 0; j < nz; j++) {
253             xw[0] = x[vi[j]];
254             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
255           }
256           t[i2] = s[0];
257           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
258           x[i2] = xw[0];
259           idiag += 1;
260           i2 += 1;
261         }
262         break;
263       case 2:
264         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
265         t[0] = b[0];
266         t[1] = b[1];
267         i2   = 2;
268         idiag += 4;
269         for (i = 1; i < m; i++) {
270           v    = aa + 4 * ai[i];
271           vi   = aj + ai[i];
272           nz   = diag[i] - ai[i];
273           s[0] = b[i2];
274           s[1] = b[i2 + 1];
275           for (j = 0; j < nz; j++) {
276             idx   = 2 * vi[j];
277             it    = 4 * j;
278             xw[0] = x[idx];
279             xw[1] = x[1 + idx];
280             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
281           }
282           t[i2]     = s[0];
283           t[i2 + 1] = s[1];
284           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
285           x[i2]     = xw[0];
286           x[i2 + 1] = xw[1];
287           idiag += 4;
288           i2 += 2;
289         }
290         break;
291       case 3:
292         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
293         t[0] = b[0];
294         t[1] = b[1];
295         t[2] = b[2];
296         i2   = 3;
297         idiag += 9;
298         for (i = 1; i < m; i++) {
299           v    = aa + 9 * ai[i];
300           vi   = aj + ai[i];
301           nz   = diag[i] - ai[i];
302           s[0] = b[i2];
303           s[1] = b[i2 + 1];
304           s[2] = b[i2 + 2];
305           while (nz--) {
306             idx   = 3 * (*vi++);
307             xw[0] = x[idx];
308             xw[1] = x[1 + idx];
309             xw[2] = x[2 + idx];
310             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
311             v += 9;
312           }
313           t[i2]     = s[0];
314           t[i2 + 1] = s[1];
315           t[i2 + 2] = s[2];
316           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
317           x[i2]     = xw[0];
318           x[i2 + 1] = xw[1];
319           x[i2 + 2] = xw[2];
320           idiag += 9;
321           i2 += 3;
322         }
323         break;
324       case 4:
325         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
326         t[0] = b[0];
327         t[1] = b[1];
328         t[2] = b[2];
329         t[3] = b[3];
330         i2   = 4;
331         idiag += 16;
332         for (i = 1; i < m; i++) {
333           v    = aa + 16 * ai[i];
334           vi   = aj + ai[i];
335           nz   = diag[i] - ai[i];
336           s[0] = b[i2];
337           s[1] = b[i2 + 1];
338           s[2] = b[i2 + 2];
339           s[3] = b[i2 + 3];
340           while (nz--) {
341             idx   = 4 * (*vi++);
342             xw[0] = x[idx];
343             xw[1] = x[1 + idx];
344             xw[2] = x[2 + idx];
345             xw[3] = x[3 + idx];
346             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
347             v += 16;
348           }
349           t[i2]     = s[0];
350           t[i2 + 1] = s[1];
351           t[i2 + 2] = s[2];
352           t[i2 + 3] = s[3];
353           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
354           x[i2]     = xw[0];
355           x[i2 + 1] = xw[1];
356           x[i2 + 2] = xw[2];
357           x[i2 + 3] = xw[3];
358           idiag += 16;
359           i2 += 4;
360         }
361         break;
362       case 5:
363         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
364         t[0] = b[0];
365         t[1] = b[1];
366         t[2] = b[2];
367         t[3] = b[3];
368         t[4] = b[4];
369         i2   = 5;
370         idiag += 25;
371         for (i = 1; i < m; i++) {
372           v    = aa + 25 * ai[i];
373           vi   = aj + ai[i];
374           nz   = diag[i] - ai[i];
375           s[0] = b[i2];
376           s[1] = b[i2 + 1];
377           s[2] = b[i2 + 2];
378           s[3] = b[i2 + 3];
379           s[4] = b[i2 + 4];
380           while (nz--) {
381             idx   = 5 * (*vi++);
382             xw[0] = x[idx];
383             xw[1] = x[1 + idx];
384             xw[2] = x[2 + idx];
385             xw[3] = x[3 + idx];
386             xw[4] = x[4 + idx];
387             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
388             v += 25;
389           }
390           t[i2]     = s[0];
391           t[i2 + 1] = s[1];
392           t[i2 + 2] = s[2];
393           t[i2 + 3] = s[3];
394           t[i2 + 4] = s[4];
395           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
396           x[i2]     = xw[0];
397           x[i2 + 1] = xw[1];
398           x[i2 + 2] = xw[2];
399           x[i2 + 3] = xw[3];
400           x[i2 + 4] = xw[4];
401           idiag += 25;
402           i2 += 5;
403         }
404         break;
405       case 6:
406         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
407         t[0] = b[0];
408         t[1] = b[1];
409         t[2] = b[2];
410         t[3] = b[3];
411         t[4] = b[4];
412         t[5] = b[5];
413         i2   = 6;
414         idiag += 36;
415         for (i = 1; i < m; i++) {
416           v    = aa + 36 * ai[i];
417           vi   = aj + ai[i];
418           nz   = diag[i] - ai[i];
419           s[0] = b[i2];
420           s[1] = b[i2 + 1];
421           s[2] = b[i2 + 2];
422           s[3] = b[i2 + 3];
423           s[4] = b[i2 + 4];
424           s[5] = b[i2 + 5];
425           while (nz--) {
426             idx   = 6 * (*vi++);
427             xw[0] = x[idx];
428             xw[1] = x[1 + idx];
429             xw[2] = x[2 + idx];
430             xw[3] = x[3 + idx];
431             xw[4] = x[4 + idx];
432             xw[5] = x[5 + idx];
433             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
434             v += 36;
435           }
436           t[i2]     = s[0];
437           t[i2 + 1] = s[1];
438           t[i2 + 2] = s[2];
439           t[i2 + 3] = s[3];
440           t[i2 + 4] = s[4];
441           t[i2 + 5] = s[5];
442           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
443           x[i2]     = xw[0];
444           x[i2 + 1] = xw[1];
445           x[i2 + 2] = xw[2];
446           x[i2 + 3] = xw[3];
447           x[i2 + 4] = xw[4];
448           x[i2 + 5] = xw[5];
449           idiag += 36;
450           i2 += 6;
451         }
452         break;
453       case 7:
454         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
455         t[0] = b[0];
456         t[1] = b[1];
457         t[2] = b[2];
458         t[3] = b[3];
459         t[4] = b[4];
460         t[5] = b[5];
461         t[6] = b[6];
462         i2   = 7;
463         idiag += 49;
464         for (i = 1; i < m; i++) {
465           v    = aa + 49 * ai[i];
466           vi   = aj + ai[i];
467           nz   = diag[i] - ai[i];
468           s[0] = b[i2];
469           s[1] = b[i2 + 1];
470           s[2] = b[i2 + 2];
471           s[3] = b[i2 + 3];
472           s[4] = b[i2 + 4];
473           s[5] = b[i2 + 5];
474           s[6] = b[i2 + 6];
475           while (nz--) {
476             idx   = 7 * (*vi++);
477             xw[0] = x[idx];
478             xw[1] = x[1 + idx];
479             xw[2] = x[2 + idx];
480             xw[3] = x[3 + idx];
481             xw[4] = x[4 + idx];
482             xw[5] = x[5 + idx];
483             xw[6] = x[6 + idx];
484             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
485             v += 49;
486           }
487           t[i2]     = s[0];
488           t[i2 + 1] = s[1];
489           t[i2 + 2] = s[2];
490           t[i2 + 3] = s[3];
491           t[i2 + 4] = s[4];
492           t[i2 + 5] = s[5];
493           t[i2 + 6] = s[6];
494           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
495           x[i2]     = xw[0];
496           x[i2 + 1] = xw[1];
497           x[i2 + 2] = xw[2];
498           x[i2 + 3] = xw[3];
499           x[i2 + 4] = xw[4];
500           x[i2 + 5] = xw[5];
501           x[i2 + 6] = xw[6];
502           idiag += 49;
503           i2 += 7;
504         }
505         break;
506       default:
507         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
508         PetscCall(PetscArraycpy(t, b, bs));
509         i2 = bs;
510         idiag += bs2;
511         for (i = 1; i < m; i++) {
512           v  = aa + bs2 * ai[i];
513           vi = aj + ai[i];
514           nz = diag[i] - ai[i];
515 
516           PetscCall(PetscArraycpy(w, b + i2, bs));
517           /* copy all rows of x that are needed into contiguous space */
518           workt = work;
519           for (j = 0; j < nz; j++) {
520             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
521             workt += bs;
522           }
523           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
524           PetscCall(PetscArraycpy(t + i2, w, bs));
525           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
526 
527           idiag += bs2;
528           i2 += bs;
529         }
530         break;
531       }
532       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
533       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
534       xb = t;
535     } else xb = b;
536     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
537       idiag = a->idiag + bs2 * (a->mbs - 1);
538       i2    = bs * (m - 1);
539       switch (bs) {
540       case 1:
541         s[0] = xb[i2];
542         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
543         x[i2] = xw[0];
544         i2 -= 1;
545         for (i = m - 2; i >= 0; i--) {
546           v    = aa + (diag[i] + 1);
547           vi   = aj + diag[i] + 1;
548           nz   = ai[i + 1] - diag[i] - 1;
549           s[0] = xb[i2];
550           for (j = 0; j < nz; j++) {
551             xw[0] = x[vi[j]];
552             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
553           }
554           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
555           x[i2] = xw[0];
556           idiag -= 1;
557           i2 -= 1;
558         }
559         break;
560       case 2:
561         s[0] = xb[i2];
562         s[1] = xb[i2 + 1];
563         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
564         x[i2]     = xw[0];
565         x[i2 + 1] = xw[1];
566         i2 -= 2;
567         idiag -= 4;
568         for (i = m - 2; i >= 0; i--) {
569           v    = aa + 4 * (diag[i] + 1);
570           vi   = aj + diag[i] + 1;
571           nz   = ai[i + 1] - diag[i] - 1;
572           s[0] = xb[i2];
573           s[1] = xb[i2 + 1];
574           for (j = 0; j < nz; j++) {
575             idx   = 2 * vi[j];
576             it    = 4 * j;
577             xw[0] = x[idx];
578             xw[1] = x[1 + idx];
579             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
580           }
581           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
582           x[i2]     = xw[0];
583           x[i2 + 1] = xw[1];
584           idiag -= 4;
585           i2 -= 2;
586         }
587         break;
588       case 3:
589         s[0] = xb[i2];
590         s[1] = xb[i2 + 1];
591         s[2] = xb[i2 + 2];
592         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
593         x[i2]     = xw[0];
594         x[i2 + 1] = xw[1];
595         x[i2 + 2] = xw[2];
596         i2 -= 3;
597         idiag -= 9;
598         for (i = m - 2; i >= 0; i--) {
599           v    = aa + 9 * (diag[i] + 1);
600           vi   = aj + diag[i] + 1;
601           nz   = ai[i + 1] - diag[i] - 1;
602           s[0] = xb[i2];
603           s[1] = xb[i2 + 1];
604           s[2] = xb[i2 + 2];
605           while (nz--) {
606             idx   = 3 * (*vi++);
607             xw[0] = x[idx];
608             xw[1] = x[1 + idx];
609             xw[2] = x[2 + idx];
610             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
611             v += 9;
612           }
613           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
614           x[i2]     = xw[0];
615           x[i2 + 1] = xw[1];
616           x[i2 + 2] = xw[2];
617           idiag -= 9;
618           i2 -= 3;
619         }
620         break;
621       case 4:
622         s[0] = xb[i2];
623         s[1] = xb[i2 + 1];
624         s[2] = xb[i2 + 2];
625         s[3] = xb[i2 + 3];
626         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
627         x[i2]     = xw[0];
628         x[i2 + 1] = xw[1];
629         x[i2 + 2] = xw[2];
630         x[i2 + 3] = xw[3];
631         i2 -= 4;
632         idiag -= 16;
633         for (i = m - 2; i >= 0; i--) {
634           v    = aa + 16 * (diag[i] + 1);
635           vi   = aj + diag[i] + 1;
636           nz   = ai[i + 1] - diag[i] - 1;
637           s[0] = xb[i2];
638           s[1] = xb[i2 + 1];
639           s[2] = xb[i2 + 2];
640           s[3] = xb[i2 + 3];
641           while (nz--) {
642             idx   = 4 * (*vi++);
643             xw[0] = x[idx];
644             xw[1] = x[1 + idx];
645             xw[2] = x[2 + idx];
646             xw[3] = x[3 + idx];
647             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
648             v += 16;
649           }
650           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
651           x[i2]     = xw[0];
652           x[i2 + 1] = xw[1];
653           x[i2 + 2] = xw[2];
654           x[i2 + 3] = xw[3];
655           idiag -= 16;
656           i2 -= 4;
657         }
658         break;
659       case 5:
660         s[0] = xb[i2];
661         s[1] = xb[i2 + 1];
662         s[2] = xb[i2 + 2];
663         s[3] = xb[i2 + 3];
664         s[4] = xb[i2 + 4];
665         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
666         x[i2]     = xw[0];
667         x[i2 + 1] = xw[1];
668         x[i2 + 2] = xw[2];
669         x[i2 + 3] = xw[3];
670         x[i2 + 4] = xw[4];
671         i2 -= 5;
672         idiag -= 25;
673         for (i = m - 2; i >= 0; i--) {
674           v    = aa + 25 * (diag[i] + 1);
675           vi   = aj + diag[i] + 1;
676           nz   = ai[i + 1] - diag[i] - 1;
677           s[0] = xb[i2];
678           s[1] = xb[i2 + 1];
679           s[2] = xb[i2 + 2];
680           s[3] = xb[i2 + 3];
681           s[4] = xb[i2 + 4];
682           while (nz--) {
683             idx   = 5 * (*vi++);
684             xw[0] = x[idx];
685             xw[1] = x[1 + idx];
686             xw[2] = x[2 + idx];
687             xw[3] = x[3 + idx];
688             xw[4] = x[4 + idx];
689             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
690             v += 25;
691           }
692           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
693           x[i2]     = xw[0];
694           x[i2 + 1] = xw[1];
695           x[i2 + 2] = xw[2];
696           x[i2 + 3] = xw[3];
697           x[i2 + 4] = xw[4];
698           idiag -= 25;
699           i2 -= 5;
700         }
701         break;
702       case 6:
703         s[0] = xb[i2];
704         s[1] = xb[i2 + 1];
705         s[2] = xb[i2 + 2];
706         s[3] = xb[i2 + 3];
707         s[4] = xb[i2 + 4];
708         s[5] = xb[i2 + 5];
709         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
710         x[i2]     = xw[0];
711         x[i2 + 1] = xw[1];
712         x[i2 + 2] = xw[2];
713         x[i2 + 3] = xw[3];
714         x[i2 + 4] = xw[4];
715         x[i2 + 5] = xw[5];
716         i2 -= 6;
717         idiag -= 36;
718         for (i = m - 2; i >= 0; i--) {
719           v    = aa + 36 * (diag[i] + 1);
720           vi   = aj + diag[i] + 1;
721           nz   = ai[i + 1] - diag[i] - 1;
722           s[0] = xb[i2];
723           s[1] = xb[i2 + 1];
724           s[2] = xb[i2 + 2];
725           s[3] = xb[i2 + 3];
726           s[4] = xb[i2 + 4];
727           s[5] = xb[i2 + 5];
728           while (nz--) {
729             idx   = 6 * (*vi++);
730             xw[0] = x[idx];
731             xw[1] = x[1 + idx];
732             xw[2] = x[2 + idx];
733             xw[3] = x[3 + idx];
734             xw[4] = x[4 + idx];
735             xw[5] = x[5 + idx];
736             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
737             v += 36;
738           }
739           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
740           x[i2]     = xw[0];
741           x[i2 + 1] = xw[1];
742           x[i2 + 2] = xw[2];
743           x[i2 + 3] = xw[3];
744           x[i2 + 4] = xw[4];
745           x[i2 + 5] = xw[5];
746           idiag -= 36;
747           i2 -= 6;
748         }
749         break;
750       case 7:
751         s[0] = xb[i2];
752         s[1] = xb[i2 + 1];
753         s[2] = xb[i2 + 2];
754         s[3] = xb[i2 + 3];
755         s[4] = xb[i2 + 4];
756         s[5] = xb[i2 + 5];
757         s[6] = xb[i2 + 6];
758         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
759         x[i2]     = xw[0];
760         x[i2 + 1] = xw[1];
761         x[i2 + 2] = xw[2];
762         x[i2 + 3] = xw[3];
763         x[i2 + 4] = xw[4];
764         x[i2 + 5] = xw[5];
765         x[i2 + 6] = xw[6];
766         i2 -= 7;
767         idiag -= 49;
768         for (i = m - 2; i >= 0; i--) {
769           v    = aa + 49 * (diag[i] + 1);
770           vi   = aj + diag[i] + 1;
771           nz   = ai[i + 1] - diag[i] - 1;
772           s[0] = xb[i2];
773           s[1] = xb[i2 + 1];
774           s[2] = xb[i2 + 2];
775           s[3] = xb[i2 + 3];
776           s[4] = xb[i2 + 4];
777           s[5] = xb[i2 + 5];
778           s[6] = xb[i2 + 6];
779           while (nz--) {
780             idx   = 7 * (*vi++);
781             xw[0] = x[idx];
782             xw[1] = x[1 + idx];
783             xw[2] = x[2 + idx];
784             xw[3] = x[3 + idx];
785             xw[4] = x[4 + idx];
786             xw[5] = x[5 + idx];
787             xw[6] = x[6 + idx];
788             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
789             v += 49;
790           }
791           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
792           x[i2]     = xw[0];
793           x[i2 + 1] = xw[1];
794           x[i2 + 2] = xw[2];
795           x[i2 + 3] = xw[3];
796           x[i2 + 4] = xw[4];
797           x[i2 + 5] = xw[5];
798           x[i2 + 6] = xw[6];
799           idiag -= 49;
800           i2 -= 7;
801         }
802         break;
803       default:
804         PetscCall(PetscArraycpy(w, xb + i2, bs));
805         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
806         i2 -= bs;
807         idiag -= bs2;
808         for (i = m - 2; i >= 0; i--) {
809           v  = aa + bs2 * (diag[i] + 1);
810           vi = aj + diag[i] + 1;
811           nz = ai[i + 1] - diag[i] - 1;
812 
813           PetscCall(PetscArraycpy(w, xb + i2, bs));
814           /* copy all rows of x that are needed into contiguous space */
815           workt = work;
816           for (j = 0; j < nz; j++) {
817             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
818             workt += bs;
819           }
820           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
821           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
822 
823           idiag -= bs2;
824           i2 -= bs;
825         }
826         break;
827       }
828       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
829     }
830     its--;
831   }
832   while (its--) {
833     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
834       idiag = a->idiag;
835       i2    = 0;
836       switch (bs) {
837       case 1:
838         for (i = 0; i < m; i++) {
839           v    = aa + ai[i];
840           vi   = aj + ai[i];
841           nz   = ai[i + 1] - ai[i];
842           s[0] = b[i2];
843           for (j = 0; j < nz; j++) {
844             xw[0] = x[vi[j]];
845             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
846           }
847           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
848           x[i2] += xw[0];
849           idiag += 1;
850           i2 += 1;
851         }
852         break;
853       case 2:
854         for (i = 0; i < m; i++) {
855           v    = aa + 4 * ai[i];
856           vi   = aj + ai[i];
857           nz   = ai[i + 1] - ai[i];
858           s[0] = b[i2];
859           s[1] = b[i2 + 1];
860           for (j = 0; j < nz; j++) {
861             idx   = 2 * vi[j];
862             it    = 4 * j;
863             xw[0] = x[idx];
864             xw[1] = x[1 + idx];
865             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
866           }
867           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
868           x[i2] += xw[0];
869           x[i2 + 1] += xw[1];
870           idiag += 4;
871           i2 += 2;
872         }
873         break;
874       case 3:
875         for (i = 0; i < m; i++) {
876           v    = aa + 9 * ai[i];
877           vi   = aj + ai[i];
878           nz   = ai[i + 1] - ai[i];
879           s[0] = b[i2];
880           s[1] = b[i2 + 1];
881           s[2] = b[i2 + 2];
882           while (nz--) {
883             idx   = 3 * (*vi++);
884             xw[0] = x[idx];
885             xw[1] = x[1 + idx];
886             xw[2] = x[2 + idx];
887             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
888             v += 9;
889           }
890           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
891           x[i2] += xw[0];
892           x[i2 + 1] += xw[1];
893           x[i2 + 2] += xw[2];
894           idiag += 9;
895           i2 += 3;
896         }
897         break;
898       case 4:
899         for (i = 0; i < m; i++) {
900           v    = aa + 16 * ai[i];
901           vi   = aj + ai[i];
902           nz   = ai[i + 1] - ai[i];
903           s[0] = b[i2];
904           s[1] = b[i2 + 1];
905           s[2] = b[i2 + 2];
906           s[3] = b[i2 + 3];
907           while (nz--) {
908             idx   = 4 * (*vi++);
909             xw[0] = x[idx];
910             xw[1] = x[1 + idx];
911             xw[2] = x[2 + idx];
912             xw[3] = x[3 + idx];
913             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
914             v += 16;
915           }
916           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
917           x[i2] += xw[0];
918           x[i2 + 1] += xw[1];
919           x[i2 + 2] += xw[2];
920           x[i2 + 3] += xw[3];
921           idiag += 16;
922           i2 += 4;
923         }
924         break;
925       case 5:
926         for (i = 0; i < m; i++) {
927           v    = aa + 25 * ai[i];
928           vi   = aj + ai[i];
929           nz   = ai[i + 1] - ai[i];
930           s[0] = b[i2];
931           s[1] = b[i2 + 1];
932           s[2] = b[i2 + 2];
933           s[3] = b[i2 + 3];
934           s[4] = b[i2 + 4];
935           while (nz--) {
936             idx   = 5 * (*vi++);
937             xw[0] = x[idx];
938             xw[1] = x[1 + idx];
939             xw[2] = x[2 + idx];
940             xw[3] = x[3 + idx];
941             xw[4] = x[4 + idx];
942             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
943             v += 25;
944           }
945           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
946           x[i2] += xw[0];
947           x[i2 + 1] += xw[1];
948           x[i2 + 2] += xw[2];
949           x[i2 + 3] += xw[3];
950           x[i2 + 4] += xw[4];
951           idiag += 25;
952           i2 += 5;
953         }
954         break;
955       case 6:
956         for (i = 0; i < m; i++) {
957           v    = aa + 36 * ai[i];
958           vi   = aj + ai[i];
959           nz   = ai[i + 1] - ai[i];
960           s[0] = b[i2];
961           s[1] = b[i2 + 1];
962           s[2] = b[i2 + 2];
963           s[3] = b[i2 + 3];
964           s[4] = b[i2 + 4];
965           s[5] = b[i2 + 5];
966           while (nz--) {
967             idx   = 6 * (*vi++);
968             xw[0] = x[idx];
969             xw[1] = x[1 + idx];
970             xw[2] = x[2 + idx];
971             xw[3] = x[3 + idx];
972             xw[4] = x[4 + idx];
973             xw[5] = x[5 + idx];
974             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
975             v += 36;
976           }
977           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
978           x[i2] += xw[0];
979           x[i2 + 1] += xw[1];
980           x[i2 + 2] += xw[2];
981           x[i2 + 3] += xw[3];
982           x[i2 + 4] += xw[4];
983           x[i2 + 5] += xw[5];
984           idiag += 36;
985           i2 += 6;
986         }
987         break;
988       case 7:
989         for (i = 0; i < m; i++) {
990           v    = aa + 49 * ai[i];
991           vi   = aj + ai[i];
992           nz   = ai[i + 1] - ai[i];
993           s[0] = b[i2];
994           s[1] = b[i2 + 1];
995           s[2] = b[i2 + 2];
996           s[3] = b[i2 + 3];
997           s[4] = b[i2 + 4];
998           s[5] = b[i2 + 5];
999           s[6] = b[i2 + 6];
1000           while (nz--) {
1001             idx   = 7 * (*vi++);
1002             xw[0] = x[idx];
1003             xw[1] = x[1 + idx];
1004             xw[2] = x[2 + idx];
1005             xw[3] = x[3 + idx];
1006             xw[4] = x[4 + idx];
1007             xw[5] = x[5 + idx];
1008             xw[6] = x[6 + idx];
1009             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1010             v += 49;
1011           }
1012           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1013           x[i2] += xw[0];
1014           x[i2 + 1] += xw[1];
1015           x[i2 + 2] += xw[2];
1016           x[i2 + 3] += xw[3];
1017           x[i2 + 4] += xw[4];
1018           x[i2 + 5] += xw[5];
1019           x[i2 + 6] += xw[6];
1020           idiag += 49;
1021           i2 += 7;
1022         }
1023         break;
1024       default:
1025         for (i = 0; i < m; i++) {
1026           v  = aa + bs2 * ai[i];
1027           vi = aj + ai[i];
1028           nz = ai[i + 1] - ai[i];
1029 
1030           PetscCall(PetscArraycpy(w, b + i2, bs));
1031           /* copy all rows of x that are needed into contiguous space */
1032           workt = work;
1033           for (j = 0; j < nz; j++) {
1034             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1035             workt += bs;
1036           }
1037           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1038           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1039 
1040           idiag += bs2;
1041           i2 += bs;
1042         }
1043         break;
1044       }
1045       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1046     }
1047     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1048       idiag = a->idiag + bs2 * (a->mbs - 1);
1049       i2    = bs * (m - 1);
1050       switch (bs) {
1051       case 1:
1052         for (i = m - 1; i >= 0; i--) {
1053           v    = aa + ai[i];
1054           vi   = aj + ai[i];
1055           nz   = ai[i + 1] - ai[i];
1056           s[0] = b[i2];
1057           for (j = 0; j < nz; j++) {
1058             xw[0] = x[vi[j]];
1059             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1060           }
1061           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1062           x[i2] += xw[0];
1063           idiag -= 1;
1064           i2 -= 1;
1065         }
1066         break;
1067       case 2:
1068         for (i = m - 1; i >= 0; i--) {
1069           v    = aa + 4 * ai[i];
1070           vi   = aj + ai[i];
1071           nz   = ai[i + 1] - ai[i];
1072           s[0] = b[i2];
1073           s[1] = b[i2 + 1];
1074           for (j = 0; j < nz; j++) {
1075             idx   = 2 * vi[j];
1076             it    = 4 * j;
1077             xw[0] = x[idx];
1078             xw[1] = x[1 + idx];
1079             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1080           }
1081           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
1082           x[i2] += xw[0];
1083           x[i2 + 1] += xw[1];
1084           idiag -= 4;
1085           i2 -= 2;
1086         }
1087         break;
1088       case 3:
1089         for (i = m - 1; i >= 0; i--) {
1090           v    = aa + 9 * ai[i];
1091           vi   = aj + ai[i];
1092           nz   = ai[i + 1] - ai[i];
1093           s[0] = b[i2];
1094           s[1] = b[i2 + 1];
1095           s[2] = b[i2 + 2];
1096           while (nz--) {
1097             idx   = 3 * (*vi++);
1098             xw[0] = x[idx];
1099             xw[1] = x[1 + idx];
1100             xw[2] = x[2 + idx];
1101             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1102             v += 9;
1103           }
1104           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
1105           x[i2] += xw[0];
1106           x[i2 + 1] += xw[1];
1107           x[i2 + 2] += xw[2];
1108           idiag -= 9;
1109           i2 -= 3;
1110         }
1111         break;
1112       case 4:
1113         for (i = m - 1; i >= 0; i--) {
1114           v    = aa + 16 * ai[i];
1115           vi   = aj + ai[i];
1116           nz   = ai[i + 1] - ai[i];
1117           s[0] = b[i2];
1118           s[1] = b[i2 + 1];
1119           s[2] = b[i2 + 2];
1120           s[3] = b[i2 + 3];
1121           while (nz--) {
1122             idx   = 4 * (*vi++);
1123             xw[0] = x[idx];
1124             xw[1] = x[1 + idx];
1125             xw[2] = x[2 + idx];
1126             xw[3] = x[3 + idx];
1127             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1128             v += 16;
1129           }
1130           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
1131           x[i2] += xw[0];
1132           x[i2 + 1] += xw[1];
1133           x[i2 + 2] += xw[2];
1134           x[i2 + 3] += xw[3];
1135           idiag -= 16;
1136           i2 -= 4;
1137         }
1138         break;
1139       case 5:
1140         for (i = m - 1; i >= 0; i--) {
1141           v    = aa + 25 * ai[i];
1142           vi   = aj + ai[i];
1143           nz   = ai[i + 1] - ai[i];
1144           s[0] = b[i2];
1145           s[1] = b[i2 + 1];
1146           s[2] = b[i2 + 2];
1147           s[3] = b[i2 + 3];
1148           s[4] = b[i2 + 4];
1149           while (nz--) {
1150             idx   = 5 * (*vi++);
1151             xw[0] = x[idx];
1152             xw[1] = x[1 + idx];
1153             xw[2] = x[2 + idx];
1154             xw[3] = x[3 + idx];
1155             xw[4] = x[4 + idx];
1156             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1157             v += 25;
1158           }
1159           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
1160           x[i2] += xw[0];
1161           x[i2 + 1] += xw[1];
1162           x[i2 + 2] += xw[2];
1163           x[i2 + 3] += xw[3];
1164           x[i2 + 4] += xw[4];
1165           idiag -= 25;
1166           i2 -= 5;
1167         }
1168         break;
1169       case 6:
1170         for (i = m - 1; i >= 0; i--) {
1171           v    = aa + 36 * ai[i];
1172           vi   = aj + ai[i];
1173           nz   = ai[i + 1] - ai[i];
1174           s[0] = b[i2];
1175           s[1] = b[i2 + 1];
1176           s[2] = b[i2 + 2];
1177           s[3] = b[i2 + 3];
1178           s[4] = b[i2 + 4];
1179           s[5] = b[i2 + 5];
1180           while (nz--) {
1181             idx   = 6 * (*vi++);
1182             xw[0] = x[idx];
1183             xw[1] = x[1 + idx];
1184             xw[2] = x[2 + idx];
1185             xw[3] = x[3 + idx];
1186             xw[4] = x[4 + idx];
1187             xw[5] = x[5 + idx];
1188             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1189             v += 36;
1190           }
1191           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
1192           x[i2] += xw[0];
1193           x[i2 + 1] += xw[1];
1194           x[i2 + 2] += xw[2];
1195           x[i2 + 3] += xw[3];
1196           x[i2 + 4] += xw[4];
1197           x[i2 + 5] += xw[5];
1198           idiag -= 36;
1199           i2 -= 6;
1200         }
1201         break;
1202       case 7:
1203         for (i = m - 1; i >= 0; i--) {
1204           v    = aa + 49 * ai[i];
1205           vi   = aj + ai[i];
1206           nz   = ai[i + 1] - ai[i];
1207           s[0] = b[i2];
1208           s[1] = b[i2 + 1];
1209           s[2] = b[i2 + 2];
1210           s[3] = b[i2 + 3];
1211           s[4] = b[i2 + 4];
1212           s[5] = b[i2 + 5];
1213           s[6] = b[i2 + 6];
1214           while (nz--) {
1215             idx   = 7 * (*vi++);
1216             xw[0] = x[idx];
1217             xw[1] = x[1 + idx];
1218             xw[2] = x[2 + idx];
1219             xw[3] = x[3 + idx];
1220             xw[4] = x[4 + idx];
1221             xw[5] = x[5 + idx];
1222             xw[6] = x[6 + idx];
1223             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1224             v += 49;
1225           }
1226           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1227           x[i2] += xw[0];
1228           x[i2 + 1] += xw[1];
1229           x[i2 + 2] += xw[2];
1230           x[i2 + 3] += xw[3];
1231           x[i2 + 4] += xw[4];
1232           x[i2 + 5] += xw[5];
1233           x[i2 + 6] += xw[6];
1234           idiag -= 49;
1235           i2 -= 7;
1236         }
1237         break;
1238       default:
1239         for (i = m - 1; i >= 0; i--) {
1240           v  = aa + bs2 * ai[i];
1241           vi = aj + ai[i];
1242           nz = ai[i + 1] - ai[i];
1243 
1244           PetscCall(PetscArraycpy(w, b + i2, bs));
1245           /* copy all rows of x that are needed into contiguous space */
1246           workt = work;
1247           for (j = 0; j < nz; j++) {
1248             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1249             workt += bs;
1250           }
1251           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1252           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1253 
1254           idiag -= bs2;
1255           i2 -= bs;
1256         }
1257         break;
1258       }
1259       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1260     }
1261   }
1262   PetscCall(VecRestoreArray(xx, &x));
1263   PetscCall(VecRestoreArrayRead(bb, &b));
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 /*
1268     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1269 */
1270 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1271   #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1272 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1273   #define matsetvaluesblocked4_ matsetvaluesblocked4
1274 #endif
1275 
1276 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1277 {
1278   Mat                A = *AA;
1279   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1280   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1281   PetscInt          *ai = a->i, *ailen = a->ilen;
1282   PetscInt          *aj = a->j, stepval, lastcol = -1;
1283   const PetscScalar *value = v;
1284   MatScalar         *ap, *aa = a->a, *bap;
1285 
1286   PetscFunctionBegin;
1287   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1288   stepval = (n - 1) * 4;
1289   for (k = 0; k < m; k++) { /* loop over added rows */
1290     row  = im[k];
1291     rp   = aj + ai[row];
1292     ap   = aa + 16 * ai[row];
1293     nrow = ailen[row];
1294     low  = 0;
1295     high = nrow;
1296     for (l = 0; l < n; l++) { /* loop over added columns */
1297       col = in[l];
1298       if (col <= lastcol) low = 0;
1299       else high = nrow;
1300       lastcol = col;
1301       value   = v + k * (stepval + 4 + l) * 4;
1302       while (high - low > 7) {
1303         t = (low + high) / 2;
1304         if (rp[t] > col) high = t;
1305         else low = t;
1306       }
1307       for (i = low; i < high; i++) {
1308         if (rp[i] > col) break;
1309         if (rp[i] == col) {
1310           bap = ap + 16 * i;
1311           for (ii = 0; ii < 4; ii++, value += stepval) {
1312             for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1313           }
1314           goto noinsert2;
1315         }
1316       }
1317       N = nrow++ - 1;
1318       high++; /* added new column index thus must search to one higher than before */
1319       /* shift up all the later entries in this row */
1320       for (ii = N; ii >= i; ii--) {
1321         rp[ii + 1] = rp[ii];
1322         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1323       }
1324       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1325       rp[i] = col;
1326       bap   = ap + 16 * i;
1327       for (ii = 0; ii < 4; ii++, value += stepval) {
1328         for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1329       }
1330     noinsert2:;
1331       low = i;
1332     }
1333     ailen[row] = nrow;
1334   }
1335   PetscFunctionReturnVoid();
1336 }
1337 
1338 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1339   #define matsetvalues4_ MATSETVALUES4
1340 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1341   #define matsetvalues4_ matsetvalues4
1342 #endif
1343 
1344 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1345 {
1346   Mat          A = *AA;
1347   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1348   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1349   PetscInt    *ai = a->i, *ailen = a->ilen;
1350   PetscInt    *aj = a->j, brow, bcol;
1351   PetscInt     ridx, cidx, lastcol = -1;
1352   MatScalar   *ap, value, *aa      = a->a, *bap;
1353 
1354   PetscFunctionBegin;
1355   for (k = 0; k < m; k++) { /* loop over added rows */
1356     row  = im[k];
1357     brow = row / 4;
1358     rp   = aj + ai[brow];
1359     ap   = aa + 16 * ai[brow];
1360     nrow = ailen[brow];
1361     low  = 0;
1362     high = nrow;
1363     for (l = 0; l < n; l++) { /* loop over added columns */
1364       col   = in[l];
1365       bcol  = col / 4;
1366       ridx  = row % 4;
1367       cidx  = col % 4;
1368       value = v[l + k * n];
1369       if (col <= lastcol) low = 0;
1370       else high = nrow;
1371       lastcol = col;
1372       while (high - low > 7) {
1373         t = (low + high) / 2;
1374         if (rp[t] > bcol) high = t;
1375         else low = t;
1376       }
1377       for (i = low; i < high; i++) {
1378         if (rp[i] > bcol) break;
1379         if (rp[i] == bcol) {
1380           bap = ap + 16 * i + 4 * cidx + ridx;
1381           *bap += value;
1382           goto noinsert1;
1383         }
1384       }
1385       N = nrow++ - 1;
1386       high++; /* added new column thus must search to one higher than before */
1387       /* shift up all the later entries in this row */
1388       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
1389       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
1390       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1391       rp[i]                        = bcol;
1392       ap[16 * i + 4 * cidx + ridx] = value;
1393     noinsert1:;
1394       low = i;
1395     }
1396     ailen[brow] = nrow;
1397   }
1398   PetscFunctionReturnVoid();
1399 }
1400 
1401 /*
1402      Checks for missing diagonals
1403 */
1404 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1405 {
1406   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1407   PetscInt    *diag, *ii = a->i, i;
1408 
1409   PetscFunctionBegin;
1410   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
1411   *missing = PETSC_FALSE;
1412   if (A->rmap->n > 0 && !ii) {
1413     *missing = PETSC_TRUE;
1414     if (d) *d = 0;
1415     PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n"));
1416   } else {
1417     PetscInt n;
1418     n    = PetscMin(a->mbs, a->nbs);
1419     diag = a->diag;
1420     for (i = 0; i < n; i++) {
1421       if (diag[i] >= ii[i + 1]) {
1422         *missing = PETSC_TRUE;
1423         if (d) *d = i;
1424         PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i));
1425         break;
1426       }
1427     }
1428   }
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1433 {
1434   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1435   PetscInt     i, j, m = a->mbs;
1436 
1437   PetscFunctionBegin;
1438   if (!a->diag) {
1439     PetscCall(PetscMalloc1(m, &a->diag));
1440     a->free_diag = PETSC_TRUE;
1441   }
1442   for (i = 0; i < m; i++) {
1443     a->diag[i] = a->i[i + 1];
1444     for (j = a->i[i]; j < a->i[i + 1]; j++) {
1445       if (a->j[j] == i) {
1446         a->diag[i] = j;
1447         break;
1448       }
1449     }
1450   }
1451   PetscFunctionReturn(0);
1452 }
1453 
1454 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1455 {
1456   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1457   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
1458   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
1459 
1460   PetscFunctionBegin;
1461   *nn = n;
1462   if (!ia) PetscFunctionReturn(0);
1463   if (symmetric) {
1464     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1465     nz = tia[n];
1466   } else {
1467     tia = a->i;
1468     tja = a->j;
1469   }
1470 
1471   if (!blockcompressed && bs > 1) {
1472     (*nn) *= bs;
1473     /* malloc & create the natural set of indices */
1474     PetscCall(PetscMalloc1((n + 1) * bs, ia));
1475     if (n) {
1476       (*ia)[0] = oshift;
1477       for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
1478     }
1479 
1480     for (i = 1; i < n; i++) {
1481       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1482       for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
1483     }
1484     if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1485 
1486     if (inja) {
1487       PetscCall(PetscMalloc1(nz * bs * bs, ja));
1488       cnt = 0;
1489       for (i = 0; i < n; i++) {
1490         for (j = 0; j < bs; j++) {
1491           for (k = tia[i]; k < tia[i + 1]; k++) {
1492             for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
1493           }
1494         }
1495       }
1496     }
1497 
1498     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1499       PetscCall(PetscFree(tia));
1500       PetscCall(PetscFree(tja));
1501     }
1502   } else if (oshift == 1) {
1503     if (symmetric) {
1504       nz = tia[A->rmap->n / bs];
1505       /*  add 1 to i and j indices */
1506       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1507       *ia = tia;
1508       if (ja) {
1509         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1510         *ja = tja;
1511       }
1512     } else {
1513       nz = a->i[A->rmap->n / bs];
1514       /* malloc space and  add 1 to i and j indices */
1515       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1516       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1517       if (ja) {
1518         PetscCall(PetscMalloc1(nz, ja));
1519         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1520       }
1521     }
1522   } else {
1523     *ia = tia;
1524     if (ja) *ja = tja;
1525   }
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1530 {
1531   PetscFunctionBegin;
1532   if (!ia) PetscFunctionReturn(0);
1533   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1534     PetscCall(PetscFree(*ia));
1535     if (ja) PetscCall(PetscFree(*ja));
1536   }
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1541 {
1542   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1543 
1544   PetscFunctionBegin;
1545 #if defined(PETSC_USE_LOG)
1546   PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz);
1547 #endif
1548   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
1549   PetscCall(ISDestroy(&a->row));
1550   PetscCall(ISDestroy(&a->col));
1551   if (a->free_diag) PetscCall(PetscFree(a->diag));
1552   PetscCall(PetscFree(a->idiag));
1553   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
1554   PetscCall(PetscFree(a->solve_work));
1555   PetscCall(PetscFree(a->mult_work));
1556   PetscCall(PetscFree(a->sor_workt));
1557   PetscCall(PetscFree(a->sor_work));
1558   PetscCall(ISDestroy(&a->icol));
1559   PetscCall(PetscFree(a->saved_values));
1560   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1561 
1562   PetscCall(MatDestroy(&a->sbaijMat));
1563   PetscCall(MatDestroy(&a->parent));
1564   PetscCall(PetscFree(A->data));
1565 
1566   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
1567   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
1568   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
1569   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
1570   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
1571   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
1572   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
1573   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
1574   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
1575   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
1576   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
1577   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
1578 #if defined(PETSC_HAVE_HYPRE)
1579   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
1580 #endif
1581   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
1582   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1587 {
1588   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1589 
1590   PetscFunctionBegin;
1591   switch (op) {
1592   case MAT_ROW_ORIENTED:
1593     a->roworiented = flg;
1594     break;
1595   case MAT_KEEP_NONZERO_PATTERN:
1596     a->keepnonzeropattern = flg;
1597     break;
1598   case MAT_NEW_NONZERO_LOCATIONS:
1599     a->nonew = (flg ? 0 : 1);
1600     break;
1601   case MAT_NEW_NONZERO_LOCATION_ERR:
1602     a->nonew = (flg ? -1 : 0);
1603     break;
1604   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1605     a->nonew = (flg ? -2 : 0);
1606     break;
1607   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1608     a->nounused = (flg ? -1 : 0);
1609     break;
1610   case MAT_FORCE_DIAGONAL_ENTRIES:
1611   case MAT_IGNORE_OFF_PROC_ENTRIES:
1612   case MAT_USE_HASH_TABLE:
1613   case MAT_SORTED_FULL:
1614     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1615     break;
1616   case MAT_SPD:
1617   case MAT_SYMMETRIC:
1618   case MAT_STRUCTURALLY_SYMMETRIC:
1619   case MAT_HERMITIAN:
1620   case MAT_SYMMETRY_ETERNAL:
1621   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1622   case MAT_SUBMAT_SINGLEIS:
1623   case MAT_STRUCTURE_ONLY:
1624   case MAT_SPD_ETERNAL:
1625     /* if the diagonal matrix is square it inherits some of the properties above */
1626     break;
1627   default:
1628     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1629   }
1630   PetscFunctionReturn(0);
1631 }
1632 
1633 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1634 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1635 {
1636   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
1637   MatScalar   *aa_i;
1638   PetscScalar *v_i;
1639 
1640   PetscFunctionBegin;
1641   bs  = A->rmap->bs;
1642   bs2 = bs * bs;
1643   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1644 
1645   bn  = row / bs; /* Block number */
1646   bp  = row % bs; /* Block Position */
1647   M   = ai[bn + 1] - ai[bn];
1648   *nz = bs * M;
1649 
1650   if (v) {
1651     *v = NULL;
1652     if (*nz) {
1653       PetscCall(PetscMalloc1(*nz, v));
1654       for (i = 0; i < M; i++) { /* for each block in the block row */
1655         v_i  = *v + i * bs;
1656         aa_i = aa + bs2 * (ai[bn] + i);
1657         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
1658       }
1659     }
1660   }
1661 
1662   if (idx) {
1663     *idx = NULL;
1664     if (*nz) {
1665       PetscCall(PetscMalloc1(*nz, idx));
1666       for (i = 0; i < M; i++) { /* for each block in the block row */
1667         idx_i = *idx + i * bs;
1668         itmp  = bs * aj[ai[bn] + i];
1669         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
1670       }
1671     }
1672   }
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1677 {
1678   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1679 
1680   PetscFunctionBegin;
1681   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1686 {
1687   PetscFunctionBegin;
1688   if (nz) *nz = 0;
1689   if (idx) PetscCall(PetscFree(*idx));
1690   if (v) PetscCall(PetscFree(*v));
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1695 {
1696   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
1697   Mat          C;
1698   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
1699   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
1700   MatScalar   *ata, *aa = a->a;
1701 
1702   PetscFunctionBegin;
1703   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1704   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1705   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1706     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1707 
1708     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
1709     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
1710     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
1711     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
1712 
1713     at  = (Mat_SeqBAIJ *)C->data;
1714     ati = at->i;
1715     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1716   } else {
1717     C   = *B;
1718     at  = (Mat_SeqBAIJ *)C->data;
1719     ati = at->i;
1720   }
1721 
1722   atj = at->j;
1723   ata = at->a;
1724 
1725   /* Copy ati into atfill so we have locations of the next free space in atj */
1726   PetscCall(PetscArraycpy(atfill, ati, nbs));
1727 
1728   /* Walk through A row-wise and mark nonzero entries of A^T. */
1729   for (i = 0; i < mbs; i++) {
1730     anzj = ai[i + 1] - ai[i];
1731     for (j = 0; j < anzj; j++) {
1732       atj[atfill[*aj]] = i;
1733       for (kr = 0; kr < bs; kr++) {
1734         for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
1735       }
1736       atfill[*aj++] += 1;
1737     }
1738   }
1739   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1740   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1741 
1742   /* Clean up temporary space and complete requests. */
1743   PetscCall(PetscFree(atfill));
1744 
1745   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1746     PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1747     *B = C;
1748   } else {
1749     PetscCall(MatHeaderMerge(A, &C));
1750   }
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1755 {
1756   Mat Btrans;
1757 
1758   PetscFunctionBegin;
1759   *f = PETSC_FALSE;
1760   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
1761   PetscCall(MatEqual_SeqBAIJ(B, Btrans, f));
1762   PetscCall(MatDestroy(&Btrans));
1763   PetscFunctionReturn(0);
1764 }
1765 
1766 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1767 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1768 {
1769   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1770   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1771   PetscInt    *rowlens, *colidxs;
1772   PetscScalar *matvals;
1773 
1774   PetscFunctionBegin;
1775   PetscCall(PetscViewerSetUp(viewer));
1776 
1777   M  = mat->rmap->N;
1778   N  = mat->cmap->N;
1779   m  = mat->rmap->n;
1780   bs = mat->rmap->bs;
1781   nz = bs * bs * A->nz;
1782 
1783   /* write matrix header */
1784   header[0] = MAT_FILE_CLASSID;
1785   header[1] = M;
1786   header[2] = N;
1787   header[3] = nz;
1788   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1789 
1790   /* store row lengths */
1791   PetscCall(PetscMalloc1(m, &rowlens));
1792   for (cnt = 0, i = 0; i < A->mbs; i++)
1793     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
1794   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
1795   PetscCall(PetscFree(rowlens));
1796 
1797   /* store column indices  */
1798   PetscCall(PetscMalloc1(nz, &colidxs));
1799   for (cnt = 0, i = 0; i < A->mbs; i++)
1800     for (k = 0; k < bs; k++)
1801       for (j = A->i[i]; j < A->i[i + 1]; j++)
1802         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
1803   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1804   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
1805   PetscCall(PetscFree(colidxs));
1806 
1807   /* store nonzero values */
1808   PetscCall(PetscMalloc1(nz, &matvals));
1809   for (cnt = 0, i = 0; i < A->mbs; i++)
1810     for (k = 0; k < bs; k++)
1811       for (j = A->i[i]; j < A->i[i + 1]; j++)
1812         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
1813   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1814   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
1815   PetscCall(PetscFree(matvals));
1816 
1817   /* write block size option to the viewer's .info file */
1818   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1819   PetscFunctionReturn(0);
1820 }
1821 
1822 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1823 {
1824   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1825   PetscInt     i, bs = A->rmap->bs, k;
1826 
1827   PetscFunctionBegin;
1828   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1829   for (i = 0; i < a->mbs; i++) {
1830     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
1831     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
1832     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1833   }
1834   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1839 {
1840   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1841   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1842   PetscViewerFormat format;
1843 
1844   PetscFunctionBegin;
1845   if (A->structure_only) {
1846     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
1847     PetscFunctionReturn(0);
1848   }
1849 
1850   PetscCall(PetscViewerGetFormat(viewer, &format));
1851   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1852     PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1853   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1854     const char *matname;
1855     Mat         aij;
1856     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
1857     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
1858     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
1859     PetscCall(MatView(aij, viewer));
1860     PetscCall(MatDestroy(&aij));
1861   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1862     PetscFunctionReturn(0);
1863   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1864     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1865     for (i = 0; i < a->mbs; i++) {
1866       for (j = 0; j < bs; j++) {
1867         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1868         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1869           for (l = 0; l < bs; l++) {
1870 #if defined(PETSC_USE_COMPLEX)
1871             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1872               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1873             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1874               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1875             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1876               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1877             }
1878 #else
1879             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1880 #endif
1881           }
1882         }
1883         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1884       }
1885     }
1886     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1887   } else {
1888     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1889     for (i = 0; i < a->mbs; i++) {
1890       for (j = 0; j < bs; j++) {
1891         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1892         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1893           for (l = 0; l < bs; l++) {
1894 #if defined(PETSC_USE_COMPLEX)
1895             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
1896               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1897             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
1898               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1899             } else {
1900               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1901             }
1902 #else
1903             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1904 #endif
1905           }
1906         }
1907         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1908       }
1909     }
1910     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1911   }
1912   PetscCall(PetscViewerFlush(viewer));
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 #include <petscdraw.h>
1917 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1918 {
1919   Mat               A = (Mat)Aa;
1920   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1921   PetscInt          row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2;
1922   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1923   MatScalar        *aa;
1924   PetscViewer       viewer;
1925   PetscViewerFormat format;
1926 
1927   PetscFunctionBegin;
1928   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
1929   PetscCall(PetscViewerGetFormat(viewer, &format));
1930   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
1931 
1932   /* loop over matrix elements drawing boxes */
1933 
1934   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1935     PetscDrawCollectiveBegin(draw);
1936     /* Blue for negative, Cyan for zero and  Red for positive */
1937     color = PETSC_DRAW_BLUE;
1938     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1939       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1940         y_l = A->rmap->N - row - 1.0;
1941         y_r = y_l + 1.0;
1942         x_l = a->j[j] * bs;
1943         x_r = x_l + 1.0;
1944         aa  = a->a + j * bs2;
1945         for (k = 0; k < bs; k++) {
1946           for (l = 0; l < bs; l++) {
1947             if (PetscRealPart(*aa++) >= 0.) continue;
1948             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1949           }
1950         }
1951       }
1952     }
1953     color = PETSC_DRAW_CYAN;
1954     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1955       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1956         y_l = A->rmap->N - row - 1.0;
1957         y_r = y_l + 1.0;
1958         x_l = a->j[j] * bs;
1959         x_r = x_l + 1.0;
1960         aa  = a->a + j * bs2;
1961         for (k = 0; k < bs; k++) {
1962           for (l = 0; l < bs; l++) {
1963             if (PetscRealPart(*aa++) != 0.) continue;
1964             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1965           }
1966         }
1967       }
1968     }
1969     color = PETSC_DRAW_RED;
1970     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1971       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1972         y_l = A->rmap->N - row - 1.0;
1973         y_r = y_l + 1.0;
1974         x_l = a->j[j] * bs;
1975         x_r = x_l + 1.0;
1976         aa  = a->a + j * bs2;
1977         for (k = 0; k < bs; k++) {
1978           for (l = 0; l < bs; l++) {
1979             if (PetscRealPart(*aa++) <= 0.) continue;
1980             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1981           }
1982         }
1983       }
1984     }
1985     PetscDrawCollectiveEnd(draw);
1986   } else {
1987     /* use contour shading to indicate magnitude of values */
1988     /* first determine max of all nonzero values */
1989     PetscReal minv = 0.0, maxv = 0.0;
1990     PetscDraw popup;
1991 
1992     for (i = 0; i < a->nz * a->bs2; i++) {
1993       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1994     }
1995     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1996     PetscCall(PetscDrawGetPopup(draw, &popup));
1997     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
1998 
1999     PetscDrawCollectiveBegin(draw);
2000     for (i = 0, row = 0; i < mbs; i++, row += bs) {
2001       for (j = a->i[i]; j < a->i[i + 1]; j++) {
2002         y_l = A->rmap->N - row - 1.0;
2003         y_r = y_l + 1.0;
2004         x_l = a->j[j] * bs;
2005         x_r = x_l + 1.0;
2006         aa  = a->a + j * bs2;
2007         for (k = 0; k < bs; k++) {
2008           for (l = 0; l < bs; l++) {
2009             MatScalar v = *aa++;
2010             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
2011             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2012           }
2013         }
2014       }
2015     }
2016     PetscDrawCollectiveEnd(draw);
2017   }
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2022 {
2023   PetscReal xl, yl, xr, yr, w, h;
2024   PetscDraw draw;
2025   PetscBool isnull;
2026 
2027   PetscFunctionBegin;
2028   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
2029   PetscCall(PetscDrawIsNull(draw, &isnull));
2030   if (isnull) PetscFunctionReturn(0);
2031 
2032   xr = A->cmap->n;
2033   yr = A->rmap->N;
2034   h  = yr / 10.0;
2035   w  = xr / 10.0;
2036   xr += w;
2037   yr += h;
2038   xl = -w;
2039   yl = -h;
2040   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
2041   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
2042   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
2043   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
2044   PetscCall(PetscDrawSave(draw));
2045   PetscFunctionReturn(0);
2046 }
2047 
2048 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2049 {
2050   PetscBool iascii, isbinary, isdraw;
2051 
2052   PetscFunctionBegin;
2053   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2054   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2055   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
2056   if (iascii) {
2057     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
2058   } else if (isbinary) {
2059     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
2060   } else if (isdraw) {
2061     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
2062   } else {
2063     Mat B;
2064     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2065     PetscCall(MatView(B, viewer));
2066     PetscCall(MatDestroy(&B));
2067   }
2068   PetscFunctionReturn(0);
2069 }
2070 
2071 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2072 {
2073   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2074   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2075   PetscInt    *ai = a->i, *ailen = a->ilen;
2076   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
2077   MatScalar   *ap, *aa = a->a;
2078 
2079   PetscFunctionBegin;
2080   for (k = 0; k < m; k++) { /* loop over rows */
2081     row  = im[k];
2082     brow = row / bs;
2083     if (row < 0) {
2084       v += n;
2085       continue;
2086     } /* negative row */
2087     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2088     rp   = aj ? aj + ai[brow] : NULL;       /* mustn't add to NULL, that is UB */
2089     ap   = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */
2090     nrow = ailen[brow];
2091     for (l = 0; l < n; l++) { /* loop over columns */
2092       if (in[l] < 0) {
2093         v++;
2094         continue;
2095       } /* negative column */
2096       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
2097       col  = in[l];
2098       bcol = col / bs;
2099       cidx = col % bs;
2100       ridx = row % bs;
2101       high = nrow;
2102       low  = 0; /* assume unsorted */
2103       while (high - low > 5) {
2104         t = (low + high) / 2;
2105         if (rp[t] > bcol) high = t;
2106         else low = t;
2107       }
2108       for (i = low; i < high; i++) {
2109         if (rp[i] > bcol) break;
2110         if (rp[i] == bcol) {
2111           *v++ = ap[bs2 * i + bs * cidx + ridx];
2112           goto finished;
2113         }
2114       }
2115       *v++ = 0.0;
2116     finished:;
2117     }
2118   }
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2123 {
2124   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2125   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2126   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2127   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2128   PetscBool          roworiented = a->roworiented;
2129   const PetscScalar *value       = v;
2130   MatScalar         *ap = NULL, *aa = a->a, *bap;
2131 
2132   PetscFunctionBegin;
2133   if (roworiented) {
2134     stepval = (n - 1) * bs;
2135   } else {
2136     stepval = (m - 1) * bs;
2137   }
2138   for (k = 0; k < m; k++) { /* loop over added rows */
2139     row = im[k];
2140     if (row < 0) continue;
2141     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
2142     rp = aj + ai[row];
2143     if (!A->structure_only) ap = aa + bs2 * ai[row];
2144     rmax = imax[row];
2145     nrow = ailen[row];
2146     low  = 0;
2147     high = nrow;
2148     for (l = 0; l < n; l++) { /* loop over added columns */
2149       if (in[l] < 0) continue;
2150       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
2151       col = in[l];
2152       if (!A->structure_only) {
2153         if (roworiented) {
2154           value = v + (k * (stepval + bs) + l) * bs;
2155         } else {
2156           value = v + (l * (stepval + bs) + k) * bs;
2157         }
2158       }
2159       if (col <= lastcol) low = 0;
2160       else high = nrow;
2161       lastcol = col;
2162       while (high - low > 7) {
2163         t = (low + high) / 2;
2164         if (rp[t] > col) high = t;
2165         else low = t;
2166       }
2167       for (i = low; i < high; i++) {
2168         if (rp[i] > col) break;
2169         if (rp[i] == col) {
2170           if (A->structure_only) goto noinsert2;
2171           bap = ap + bs2 * i;
2172           if (roworiented) {
2173             if (is == ADD_VALUES) {
2174               for (ii = 0; ii < bs; ii++, value += stepval) {
2175                 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2176               }
2177             } else {
2178               for (ii = 0; ii < bs; ii++, value += stepval) {
2179                 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2180               }
2181             }
2182           } else {
2183             if (is == ADD_VALUES) {
2184               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2185                 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
2186                 bap += bs;
2187               }
2188             } else {
2189               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2190                 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
2191                 bap += bs;
2192               }
2193             }
2194           }
2195           goto noinsert2;
2196         }
2197       }
2198       if (nonew == 1) goto noinsert2;
2199       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2200       if (A->structure_only) {
2201         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
2202       } else {
2203         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2204       }
2205       N = nrow++ - 1;
2206       high++;
2207       /* shift up all the later entries in this row */
2208       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2209       rp[i] = col;
2210       if (!A->structure_only) {
2211         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2212         bap = ap + bs2 * i;
2213         if (roworiented) {
2214           for (ii = 0; ii < bs; ii++, value += stepval) {
2215             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2216           }
2217         } else {
2218           for (ii = 0; ii < bs; ii++, value += stepval) {
2219             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2220           }
2221         }
2222       }
2223     noinsert2:;
2224       low = i;
2225     }
2226     ailen[row] = nrow;
2227   }
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2232 {
2233   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2234   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2235   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2236   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2237   MatScalar   *aa    = a->a, *ap;
2238   PetscReal    ratio = 0.6;
2239 
2240   PetscFunctionBegin;
2241   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2242 
2243   if (m) rmax = ailen[0];
2244   for (i = 1; i < mbs; i++) {
2245     /* move each row back by the amount of empty slots (fshift) before it*/
2246     fshift += imax[i - 1] - ailen[i - 1];
2247     rmax = PetscMax(rmax, ailen[i]);
2248     if (fshift) {
2249       ip = aj + ai[i];
2250       ap = aa + bs2 * ai[i];
2251       N  = ailen[i];
2252       PetscCall(PetscArraymove(ip - fshift, ip, N));
2253       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2254     }
2255     ai[i] = ai[i - 1] + ailen[i - 1];
2256   }
2257   if (mbs) {
2258     fshift += imax[mbs - 1] - ailen[mbs - 1];
2259     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2260   }
2261 
2262   /* reset ilen and imax for each row */
2263   a->nonzerorowcnt = 0;
2264   if (A->structure_only) {
2265     PetscCall(PetscFree2(a->imax, a->ilen));
2266   } else { /* !A->structure_only */
2267     for (i = 0; i < mbs; i++) {
2268       ailen[i] = imax[i] = ai[i + 1] - ai[i];
2269       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2270     }
2271   }
2272   a->nz = ai[mbs];
2273 
2274   /* diagonals may have moved, so kill the diagonal pointers */
2275   a->idiagvalid = PETSC_FALSE;
2276   if (fshift && a->diag) {
2277     PetscCall(PetscFree(a->diag));
2278     a->diag = NULL;
2279   }
2280   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
2281   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
2282   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
2283   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
2284 
2285   A->info.mallocs += a->reallocs;
2286   a->reallocs         = 0;
2287   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2288   a->rmax             = rmax;
2289 
2290   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 /*
2295    This function returns an array of flags which indicate the locations of contiguous
2296    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2297    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2298    Assume: sizes should be long enough to hold all the values.
2299 */
2300 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2301 {
2302   PetscInt  i, j, k, row;
2303   PetscBool flg;
2304 
2305   PetscFunctionBegin;
2306   for (i = 0, j = 0; i < n; j++) {
2307     row = idx[i];
2308     if (row % bs != 0) { /* Not the beginning of a block */
2309       sizes[j] = 1;
2310       i++;
2311     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2312       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2313       i++;
2314     } else { /* Beginning of the block, so check if the complete block exists */
2315       flg = PETSC_TRUE;
2316       for (k = 1; k < bs; k++) {
2317         if (row + k != idx[i + k]) { /* break in the block */
2318           flg = PETSC_FALSE;
2319           break;
2320         }
2321       }
2322       if (flg) { /* No break in the bs */
2323         sizes[j] = bs;
2324         i += bs;
2325       } else {
2326         sizes[j] = 1;
2327         i++;
2328       }
2329     }
2330   }
2331   *bs_max = j;
2332   PetscFunctionReturn(0);
2333 }
2334 
2335 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2336 {
2337   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2338   PetscInt           i, j, k, count, *rows;
2339   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
2340   PetscScalar        zero = 0.0;
2341   MatScalar         *aa;
2342   const PetscScalar *xx;
2343   PetscScalar       *bb;
2344 
2345   PetscFunctionBegin;
2346   /* fix right hand side if needed */
2347   if (x && b) {
2348     PetscCall(VecGetArrayRead(x, &xx));
2349     PetscCall(VecGetArray(b, &bb));
2350     for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
2351     PetscCall(VecRestoreArrayRead(x, &xx));
2352     PetscCall(VecRestoreArray(b, &bb));
2353   }
2354 
2355   /* Make a copy of the IS and  sort it */
2356   /* allocate memory for rows,sizes */
2357   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2358 
2359   /* copy IS values to rows, and sort them */
2360   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
2361   PetscCall(PetscSortInt(is_n, rows));
2362 
2363   if (baij->keepnonzeropattern) {
2364     for (i = 0; i < is_n; i++) sizes[i] = 1;
2365     bs_max = is_n;
2366   } else {
2367     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2368     A->nonzerostate++;
2369   }
2370 
2371   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2372     row = rows[j];
2373     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2374     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2375     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
2376     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2377       if (diag != (PetscScalar)0.0) {
2378         if (baij->ilen[row / bs] > 0) {
2379           baij->ilen[row / bs]       = 1;
2380           baij->j[baij->i[row / bs]] = row / bs;
2381 
2382           PetscCall(PetscArrayzero(aa, count * bs));
2383         }
2384         /* Now insert all the diagonal values for this bs */
2385         for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES));
2386       } else { /* (diag == 0.0) */
2387         baij->ilen[row / bs] = 0;
2388       }      /* end (diag == 0.0) */
2389     } else { /* (sizes[i] != bs) */
2390       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2391       for (k = 0; k < count; k++) {
2392         aa[0] = zero;
2393         aa += bs;
2394       }
2395       if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES));
2396     }
2397   }
2398 
2399   PetscCall(PetscFree2(rows, sizes));
2400   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2401   PetscFunctionReturn(0);
2402 }
2403 
2404 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2405 {
2406   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2407   PetscInt           i, j, k, count;
2408   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
2409   PetscScalar        zero = 0.0;
2410   MatScalar         *aa;
2411   const PetscScalar *xx;
2412   PetscScalar       *bb;
2413   PetscBool         *zeroed, vecs = PETSC_FALSE;
2414 
2415   PetscFunctionBegin;
2416   /* fix right hand side if needed */
2417   if (x && b) {
2418     PetscCall(VecGetArrayRead(x, &xx));
2419     PetscCall(VecGetArray(b, &bb));
2420     vecs = PETSC_TRUE;
2421   }
2422 
2423   /* zero the columns */
2424   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
2425   for (i = 0; i < is_n; i++) {
2426     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
2427     zeroed[is_idx[i]] = PETSC_TRUE;
2428   }
2429   for (i = 0; i < A->rmap->N; i++) {
2430     if (!zeroed[i]) {
2431       row = i / bs;
2432       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
2433         for (k = 0; k < bs; k++) {
2434           col = bs * baij->j[j] + k;
2435           if (zeroed[col]) {
2436             aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
2437             if (vecs) bb[i] -= aa[0] * xx[col];
2438             aa[0] = 0.0;
2439           }
2440         }
2441       }
2442     } else if (vecs) bb[i] = diag * xx[i];
2443   }
2444   PetscCall(PetscFree(zeroed));
2445   if (vecs) {
2446     PetscCall(VecRestoreArrayRead(x, &xx));
2447     PetscCall(VecRestoreArray(b, &bb));
2448   }
2449 
2450   /* zero the rows */
2451   for (i = 0; i < is_n; i++) {
2452     row   = is_idx[i];
2453     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2454     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
2455     for (k = 0; k < count; k++) {
2456       aa[0] = zero;
2457       aa += bs;
2458     }
2459     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
2460   }
2461   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2466 {
2467   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2468   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2469   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2470   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2471   PetscInt     ridx, cidx, bs2                 = a->bs2;
2472   PetscBool    roworiented = a->roworiented;
2473   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;
2474 
2475   PetscFunctionBegin;
2476   for (k = 0; k < m; k++) { /* loop over added rows */
2477     row  = im[k];
2478     brow = row / bs;
2479     if (row < 0) continue;
2480     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
2481     rp = aj + ai[brow];
2482     if (!A->structure_only) ap = aa + bs2 * ai[brow];
2483     rmax = imax[brow];
2484     nrow = ailen[brow];
2485     low  = 0;
2486     high = nrow;
2487     for (l = 0; l < n; l++) { /* loop over added columns */
2488       if (in[l] < 0) continue;
2489       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
2490       col  = in[l];
2491       bcol = col / bs;
2492       ridx = row % bs;
2493       cidx = col % bs;
2494       if (!A->structure_only) {
2495         if (roworiented) {
2496           value = v[l + k * n];
2497         } else {
2498           value = v[k + l * m];
2499         }
2500       }
2501       if (col <= lastcol) low = 0;
2502       else high = nrow;
2503       lastcol = col;
2504       while (high - low > 7) {
2505         t = (low + high) / 2;
2506         if (rp[t] > bcol) high = t;
2507         else low = t;
2508       }
2509       for (i = low; i < high; i++) {
2510         if (rp[i] > bcol) break;
2511         if (rp[i] == bcol) {
2512           bap = ap + bs2 * i + bs * cidx + ridx;
2513           if (!A->structure_only) {
2514             if (is == ADD_VALUES) *bap += value;
2515             else *bap = value;
2516           }
2517           goto noinsert1;
2518         }
2519       }
2520       if (nonew == 1) goto noinsert1;
2521       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2522       if (A->structure_only) {
2523         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2524       } else {
2525         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2526       }
2527       N = nrow++ - 1;
2528       high++;
2529       /* shift up all the later entries in this row */
2530       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2531       rp[i] = bcol;
2532       if (!A->structure_only) {
2533         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2534         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2535         ap[bs2 * i + bs * cidx + ridx] = value;
2536       }
2537       a->nz++;
2538       A->nonzerostate++;
2539     noinsert1:;
2540       low = i;
2541     }
2542     ailen[brow] = nrow;
2543   }
2544   PetscFunctionReturn(0);
2545 }
2546 
2547 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2548 {
2549   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
2550   Mat          outA;
2551   PetscBool    row_identity, col_identity;
2552 
2553   PetscFunctionBegin;
2554   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
2555   PetscCall(ISIdentity(row, &row_identity));
2556   PetscCall(ISIdentity(col, &col_identity));
2557   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
2558 
2559   outA            = inA;
2560   inA->factortype = MAT_FACTOR_LU;
2561   PetscCall(PetscFree(inA->solvertype));
2562   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
2563 
2564   PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2565 
2566   PetscCall(PetscObjectReference((PetscObject)row));
2567   PetscCall(ISDestroy(&a->row));
2568   a->row = row;
2569   PetscCall(PetscObjectReference((PetscObject)col));
2570   PetscCall(ISDestroy(&a->col));
2571   a->col = col;
2572 
2573   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2574   PetscCall(ISDestroy(&a->icol));
2575   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2576 
2577   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2578   if (!a->solve_work) { PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); }
2579   PetscCall(MatLUFactorNumeric(outA, inA, info));
2580   PetscFunctionReturn(0);
2581 }
2582 
2583 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, PetscInt *indices)
2584 {
2585   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2586   PetscInt     i, nz, mbs;
2587 
2588   PetscFunctionBegin;
2589   nz  = baij->maxnz;
2590   mbs = baij->mbs;
2591   for (i = 0; i < nz; i++) baij->j[i] = indices[i];
2592   baij->nz = nz;
2593   for (i = 0; i < mbs; i++) baij->ilen[i] = baij->imax[i];
2594   PetscFunctionReturn(0);
2595 }
2596 
2597 /*@
2598     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix.
2599 
2600   Input Parameters:
2601 +  mat - the `MATSEQBAIJ` matrix
2602 -  indices - the column indices
2603 
2604   Level: advanced
2605 
2606   Notes:
2607     This can be called if you have precomputed the nonzero structure of the
2608   matrix and want to provide it to the matrix object to improve the performance
2609   of the `MatSetValues()` operation.
2610 
2611     You MUST have set the correct numbers of nonzeros per row in the call to
2612   `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
2613 
2614     MUST be called before any calls to `MatSetValues()`
2615 
2616 .seealso: `MATSEQBAIJ`, `MatSetValues()`
2617 @*/
2618 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2619 {
2620   PetscFunctionBegin;
2621   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2622   PetscValidIntPointer(indices, 2);
2623   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices));
2624   PetscFunctionReturn(0);
2625 }
2626 
2627 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2628 {
2629   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2630   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2631   PetscReal    atmp;
2632   PetscScalar *x, zero = 0.0;
2633   MatScalar   *aa;
2634   PetscInt     ncols, brow, krow, kcol;
2635 
2636   PetscFunctionBegin;
2637   /* why is this not a macro???????????????????????????????????????????????????????????????? */
2638   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2639   bs  = A->rmap->bs;
2640   aa  = a->a;
2641   ai  = a->i;
2642   aj  = a->j;
2643   mbs = a->mbs;
2644 
2645   PetscCall(VecSet(v, zero));
2646   PetscCall(VecGetArray(v, &x));
2647   PetscCall(VecGetLocalSize(v, &n));
2648   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2649   for (i = 0; i < mbs; i++) {
2650     ncols = ai[1] - ai[0];
2651     ai++;
2652     brow = bs * i;
2653     for (j = 0; j < ncols; j++) {
2654       for (kcol = 0; kcol < bs; kcol++) {
2655         for (krow = 0; krow < bs; krow++) {
2656           atmp = PetscAbsScalar(*aa);
2657           aa++;
2658           row = brow + krow; /* row index */
2659           if (PetscAbsScalar(x[row]) < atmp) {
2660             x[row] = atmp;
2661             if (idx) idx[row] = bs * (*aj) + kcol;
2662           }
2663         }
2664       }
2665       aj++;
2666     }
2667   }
2668   PetscCall(VecRestoreArray(v, &x));
2669   PetscFunctionReturn(0);
2670 }
2671 
2672 PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2673 {
2674   PetscFunctionBegin;
2675   /* If the two matrices have the same copy implementation, use fast copy. */
2676   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2677     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
2678     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2679     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
2680 
2681     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
2682     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
2683     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
2684     PetscCall(PetscObjectStateIncrease((PetscObject)B));
2685   } else {
2686     PetscCall(MatCopy_Basic(A, B, str));
2687   }
2688   PetscFunctionReturn(0);
2689 }
2690 
2691 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2692 {
2693   PetscFunctionBegin;
2694   PetscCall(MatSeqBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL));
2695   PetscFunctionReturn(0);
2696 }
2697 
2698 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2699 {
2700   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2701 
2702   PetscFunctionBegin;
2703   *array = a->a;
2704   PetscFunctionReturn(0);
2705 }
2706 
2707 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2708 {
2709   PetscFunctionBegin;
2710   *array = NULL;
2711   PetscFunctionReturn(0);
2712 }
2713 
2714 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2715 {
2716   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
2717   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
2718   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
2719 
2720   PetscFunctionBegin;
2721   /* Set the number of nonzeros in the new matrix */
2722   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
2723   PetscFunctionReturn(0);
2724 }
2725 
2726 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2727 {
2728   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
2729   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2730   PetscBLASInt one = 1;
2731 
2732   PetscFunctionBegin;
2733   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2734     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2735     if (e) {
2736       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2737       if (e) {
2738         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2739         if (e) str = SAME_NONZERO_PATTERN;
2740       }
2741     }
2742     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2743   }
2744   if (str == SAME_NONZERO_PATTERN) {
2745     PetscScalar  alpha = a;
2746     PetscBLASInt bnz;
2747     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2748     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
2749     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2750   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2751     PetscCall(MatAXPY_Basic(Y, a, X, str));
2752   } else {
2753     Mat       B;
2754     PetscInt *nnz;
2755     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
2756     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
2757     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2758     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2759     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
2760     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
2761     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
2762     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
2763     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
2764     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2765     PetscCall(MatHeaderMerge(Y, &B));
2766     PetscCall(PetscFree(nnz));
2767   }
2768   PetscFunctionReturn(0);
2769 }
2770 
2771 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2772 {
2773 #if defined(PETSC_USE_COMPLEX)
2774   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2775   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2776   MatScalar   *aa = a->a;
2777 
2778   PetscFunctionBegin;
2779   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
2780 #else
2781   PetscFunctionBegin;
2782 #endif
2783   PetscFunctionReturn(0);
2784 }
2785 
2786 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2787 {
2788   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2789   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2790   MatScalar   *aa = a->a;
2791 
2792   PetscFunctionBegin;
2793   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
2794   PetscFunctionReturn(0);
2795 }
2796 
2797 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2798 {
2799   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2800   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2801   MatScalar   *aa = a->a;
2802 
2803   PetscFunctionBegin;
2804   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2805   PetscFunctionReturn(0);
2806 }
2807 
2808 /*
2809     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2810 */
2811 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2812 {
2813   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
2814   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
2815   PetscInt     nz = a->i[m], row, *jj, mr, col;
2816 
2817   PetscFunctionBegin;
2818   *nn = n;
2819   if (!ia) PetscFunctionReturn(0);
2820   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
2821   PetscCall(PetscCalloc1(n, &collengths));
2822   PetscCall(PetscMalloc1(n + 1, &cia));
2823   PetscCall(PetscMalloc1(nz, &cja));
2824   jj = a->j;
2825   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2826   cia[0] = oshift;
2827   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2828   PetscCall(PetscArrayzero(collengths, n));
2829   jj = a->j;
2830   for (row = 0; row < m; row++) {
2831     mr = a->i[row + 1] - a->i[row];
2832     for (i = 0; i < mr; i++) {
2833       col = *jj++;
2834 
2835       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2836     }
2837   }
2838   PetscCall(PetscFree(collengths));
2839   *ia = cia;
2840   *ja = cja;
2841   PetscFunctionReturn(0);
2842 }
2843 
2844 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2845 {
2846   PetscFunctionBegin;
2847   if (!ia) PetscFunctionReturn(0);
2848   PetscCall(PetscFree(*ia));
2849   PetscCall(PetscFree(*ja));
2850   PetscFunctionReturn(0);
2851 }
2852 
2853 /*
2854  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2855  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2856  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2857  */
2858 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2859 {
2860   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2861   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2862   PetscInt     nz = a->i[m], row, *jj, mr, col;
2863   PetscInt    *cspidx;
2864 
2865   PetscFunctionBegin;
2866   *nn = n;
2867   if (!ia) PetscFunctionReturn(0);
2868 
2869   PetscCall(PetscCalloc1(n, &collengths));
2870   PetscCall(PetscMalloc1(n + 1, &cia));
2871   PetscCall(PetscMalloc1(nz, &cja));
2872   PetscCall(PetscMalloc1(nz, &cspidx));
2873   jj = a->j;
2874   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2875   cia[0] = oshift;
2876   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2877   PetscCall(PetscArrayzero(collengths, n));
2878   jj = a->j;
2879   for (row = 0; row < m; row++) {
2880     mr = a->i[row + 1] - a->i[row];
2881     for (i = 0; i < mr; i++) {
2882       col                                         = *jj++;
2883       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2884       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2885     }
2886   }
2887   PetscCall(PetscFree(collengths));
2888   *ia    = cia;
2889   *ja    = cja;
2890   *spidx = cspidx;
2891   PetscFunctionReturn(0);
2892 }
2893 
2894 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2895 {
2896   PetscFunctionBegin;
2897   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
2898   PetscCall(PetscFree(*spidx));
2899   PetscFunctionReturn(0);
2900 }
2901 
2902 PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2903 {
2904   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
2905 
2906   PetscFunctionBegin;
2907   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
2908   PetscCall(MatShift_Basic(Y, a));
2909   PetscFunctionReturn(0);
2910 }
2911 
2912 /* -------------------------------------------------------------------*/
2913 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2914                                        MatGetRow_SeqBAIJ,
2915                                        MatRestoreRow_SeqBAIJ,
2916                                        MatMult_SeqBAIJ_N,
2917                                        /* 4*/ MatMultAdd_SeqBAIJ_N,
2918                                        MatMultTranspose_SeqBAIJ,
2919                                        MatMultTransposeAdd_SeqBAIJ,
2920                                        NULL,
2921                                        NULL,
2922                                        NULL,
2923                                        /* 10*/ NULL,
2924                                        MatLUFactor_SeqBAIJ,
2925                                        NULL,
2926                                        NULL,
2927                                        MatTranspose_SeqBAIJ,
2928                                        /* 15*/ MatGetInfo_SeqBAIJ,
2929                                        MatEqual_SeqBAIJ,
2930                                        MatGetDiagonal_SeqBAIJ,
2931                                        MatDiagonalScale_SeqBAIJ,
2932                                        MatNorm_SeqBAIJ,
2933                                        /* 20*/ NULL,
2934                                        MatAssemblyEnd_SeqBAIJ,
2935                                        MatSetOption_SeqBAIJ,
2936                                        MatZeroEntries_SeqBAIJ,
2937                                        /* 24*/ MatZeroRows_SeqBAIJ,
2938                                        NULL,
2939                                        NULL,
2940                                        NULL,
2941                                        NULL,
2942                                        /* 29*/ MatSetUp_SeqBAIJ,
2943                                        NULL,
2944                                        NULL,
2945                                        NULL,
2946                                        NULL,
2947                                        /* 34*/ MatDuplicate_SeqBAIJ,
2948                                        NULL,
2949                                        NULL,
2950                                        MatILUFactor_SeqBAIJ,
2951                                        NULL,
2952                                        /* 39*/ MatAXPY_SeqBAIJ,
2953                                        MatCreateSubMatrices_SeqBAIJ,
2954                                        MatIncreaseOverlap_SeqBAIJ,
2955                                        MatGetValues_SeqBAIJ,
2956                                        MatCopy_SeqBAIJ,
2957                                        /* 44*/ NULL,
2958                                        MatScale_SeqBAIJ,
2959                                        MatShift_SeqBAIJ,
2960                                        NULL,
2961                                        MatZeroRowsColumns_SeqBAIJ,
2962                                        /* 49*/ NULL,
2963                                        MatGetRowIJ_SeqBAIJ,
2964                                        MatRestoreRowIJ_SeqBAIJ,
2965                                        MatGetColumnIJ_SeqBAIJ,
2966                                        MatRestoreColumnIJ_SeqBAIJ,
2967                                        /* 54*/ MatFDColoringCreate_SeqXAIJ,
2968                                        NULL,
2969                                        NULL,
2970                                        NULL,
2971                                        MatSetValuesBlocked_SeqBAIJ,
2972                                        /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2973                                        MatDestroy_SeqBAIJ,
2974                                        MatView_SeqBAIJ,
2975                                        NULL,
2976                                        NULL,
2977                                        /* 64*/ NULL,
2978                                        NULL,
2979                                        NULL,
2980                                        NULL,
2981                                        NULL,
2982                                        /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2983                                        NULL,
2984                                        MatConvert_Basic,
2985                                        NULL,
2986                                        NULL,
2987                                        /* 74*/ NULL,
2988                                        MatFDColoringApply_BAIJ,
2989                                        NULL,
2990                                        NULL,
2991                                        NULL,
2992                                        /* 79*/ NULL,
2993                                        NULL,
2994                                        NULL,
2995                                        NULL,
2996                                        MatLoad_SeqBAIJ,
2997                                        /* 84*/ NULL,
2998                                        NULL,
2999                                        NULL,
3000                                        NULL,
3001                                        NULL,
3002                                        /* 89*/ NULL,
3003                                        NULL,
3004                                        NULL,
3005                                        NULL,
3006                                        NULL,
3007                                        /* 94*/ NULL,
3008                                        NULL,
3009                                        NULL,
3010                                        NULL,
3011                                        NULL,
3012                                        /* 99*/ NULL,
3013                                        NULL,
3014                                        NULL,
3015                                        MatConjugate_SeqBAIJ,
3016                                        NULL,
3017                                        /*104*/ NULL,
3018                                        MatRealPart_SeqBAIJ,
3019                                        MatImaginaryPart_SeqBAIJ,
3020                                        NULL,
3021                                        NULL,
3022                                        /*109*/ NULL,
3023                                        NULL,
3024                                        NULL,
3025                                        NULL,
3026                                        MatMissingDiagonal_SeqBAIJ,
3027                                        /*114*/ NULL,
3028                                        NULL,
3029                                        NULL,
3030                                        NULL,
3031                                        NULL,
3032                                        /*119*/ NULL,
3033                                        NULL,
3034                                        MatMultHermitianTranspose_SeqBAIJ,
3035                                        MatMultHermitianTransposeAdd_SeqBAIJ,
3036                                        NULL,
3037                                        /*124*/ NULL,
3038                                        MatGetColumnReductions_SeqBAIJ,
3039                                        MatInvertBlockDiagonal_SeqBAIJ,
3040                                        NULL,
3041                                        NULL,
3042                                        /*129*/ NULL,
3043                                        NULL,
3044                                        NULL,
3045                                        NULL,
3046                                        NULL,
3047                                        /*134*/ NULL,
3048                                        NULL,
3049                                        NULL,
3050                                        NULL,
3051                                        NULL,
3052                                        /*139*/ MatSetBlockSizes_Default,
3053                                        NULL,
3054                                        NULL,
3055                                        MatFDColoringSetUp_SeqXAIJ,
3056                                        NULL,
3057                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3058                                        MatDestroySubMatrices_SeqBAIJ,
3059                                        NULL,
3060                                        NULL,
3061                                        NULL,
3062                                        NULL,
3063                                        /*150*/ NULL,
3064                                        NULL};
3065 
3066 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3067 {
3068   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3069   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3070 
3071   PetscFunctionBegin;
3072   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3073 
3074   /* allocate space for values if not already there */
3075   if (!aij->saved_values) { PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); }
3076 
3077   /* copy values over */
3078   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
3079   PetscFunctionReturn(0);
3080 }
3081 
3082 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3083 {
3084   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3085   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3086 
3087   PetscFunctionBegin;
3088   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3089   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
3090 
3091   /* copy values over */
3092   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
3093   PetscFunctionReturn(0);
3094 }
3095 
3096 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3097 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3098 
3099 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, PetscInt *nnz)
3100 {
3101   Mat_SeqBAIJ *b;
3102   PetscInt     i, mbs, nbs, bs2;
3103   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3104 
3105   PetscFunctionBegin;
3106   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3107   if (nz == MAT_SKIP_ALLOCATION) {
3108     skipallocation = PETSC_TRUE;
3109     nz             = 0;
3110   }
3111 
3112   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
3113   PetscCall(PetscLayoutSetUp(B->rmap));
3114   PetscCall(PetscLayoutSetUp(B->cmap));
3115   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3116 
3117   B->preallocated = PETSC_TRUE;
3118 
3119   mbs = B->rmap->n / bs;
3120   nbs = B->cmap->n / bs;
3121   bs2 = bs * bs;
3122 
3123   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3124 
3125   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3126   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3127   if (nnz) {
3128     for (i = 0; i < mbs; i++) {
3129       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
3130       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3131     }
3132   }
3133 
3134   b = (Mat_SeqBAIJ *)B->data;
3135   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
3136   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3137   PetscOptionsEnd();
3138 
3139   if (!flg) {
3140     switch (bs) {
3141     case 1:
3142       B->ops->mult    = MatMult_SeqBAIJ_1;
3143       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3144       break;
3145     case 2:
3146       B->ops->mult    = MatMult_SeqBAIJ_2;
3147       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3148       break;
3149     case 3:
3150       B->ops->mult    = MatMult_SeqBAIJ_3;
3151       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3152       break;
3153     case 4:
3154       B->ops->mult    = MatMult_SeqBAIJ_4;
3155       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3156       break;
3157     case 5:
3158       B->ops->mult    = MatMult_SeqBAIJ_5;
3159       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3160       break;
3161     case 6:
3162       B->ops->mult    = MatMult_SeqBAIJ_6;
3163       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3164       break;
3165     case 7:
3166       B->ops->mult    = MatMult_SeqBAIJ_7;
3167       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3168       break;
3169     case 9: {
3170       PetscInt version = 1;
3171       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3172       switch (version) {
3173 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3174       case 1:
3175         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
3176         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
3177         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3178         break;
3179 #endif
3180       default:
3181         B->ops->mult    = MatMult_SeqBAIJ_N;
3182         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3183         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3184         break;
3185       }
3186       break;
3187     }
3188     case 11:
3189       B->ops->mult    = MatMult_SeqBAIJ_11;
3190       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3191       break;
3192     case 12: {
3193       PetscInt version = 1;
3194       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3195       switch (version) {
3196       case 1:
3197         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
3198         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3199         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3200         break;
3201       case 2:
3202         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
3203         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
3204         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3205         break;
3206 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3207       case 3:
3208         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
3209         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3210         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3211         break;
3212 #endif
3213       default:
3214         B->ops->mult    = MatMult_SeqBAIJ_N;
3215         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3216         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3217         break;
3218       }
3219       break;
3220     }
3221     case 15: {
3222       PetscInt version = 1;
3223       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3224       switch (version) {
3225       case 1:
3226         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3227         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3228         break;
3229       case 2:
3230         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
3231         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3232         break;
3233       case 3:
3234         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3235         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3236         break;
3237       case 4:
3238         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3239         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3240         break;
3241       default:
3242         B->ops->mult = MatMult_SeqBAIJ_N;
3243         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3244         break;
3245       }
3246       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3247       break;
3248     }
3249     default:
3250       B->ops->mult    = MatMult_SeqBAIJ_N;
3251       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3252       PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3253       break;
3254     }
3255   }
3256   B->ops->sor = MatSOR_SeqBAIJ;
3257   b->mbs      = mbs;
3258   b->nbs      = nbs;
3259   if (!skipallocation) {
3260     if (!b->imax) {
3261       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
3262 
3263       b->free_imax_ilen = PETSC_TRUE;
3264     }
3265     /* b->ilen will count nonzeros in each block row so far. */
3266     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3267     if (!nnz) {
3268       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3269       else if (nz < 0) nz = 1;
3270       nz = PetscMin(nz, nbs);
3271       for (i = 0; i < mbs; i++) b->imax[i] = nz;
3272       PetscCall(PetscIntMultError(nz, mbs, &nz));
3273     } else {
3274       PetscInt64 nz64 = 0;
3275       for (i = 0; i < mbs; i++) {
3276         b->imax[i] = nnz[i];
3277         nz64 += nnz[i];
3278       }
3279       PetscCall(PetscIntCast(nz64, &nz));
3280     }
3281 
3282     /* allocate the matrix space */
3283     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3284     if (B->structure_only) {
3285       PetscCall(PetscMalloc1(nz, &b->j));
3286       PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i));
3287     } else {
3288       PetscInt nzbs2 = 0;
3289       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
3290       PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i));
3291       PetscCall(PetscArrayzero(b->a, nz * bs2));
3292     }
3293     PetscCall(PetscArrayzero(b->j, nz));
3294 
3295     if (B->structure_only) {
3296       b->singlemalloc = PETSC_FALSE;
3297       b->free_a       = PETSC_FALSE;
3298     } else {
3299       b->singlemalloc = PETSC_TRUE;
3300       b->free_a       = PETSC_TRUE;
3301     }
3302     b->free_ij = PETSC_TRUE;
3303 
3304     b->i[0] = 0;
3305     for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3306 
3307   } else {
3308     b->free_a  = PETSC_FALSE;
3309     b->free_ij = PETSC_FALSE;
3310   }
3311 
3312   b->bs2              = bs2;
3313   b->mbs              = mbs;
3314   b->nz               = 0;
3315   b->maxnz            = nz;
3316   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3317   B->was_assembled    = PETSC_FALSE;
3318   B->assembled        = PETSC_FALSE;
3319   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3320   PetscFunctionReturn(0);
3321 }
3322 
3323 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3324 {
3325   PetscInt     i, m, nz, nz_max = 0, *nnz;
3326   PetscScalar *values      = NULL;
3327   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3328 
3329   PetscFunctionBegin;
3330   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
3331   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
3332   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
3333   PetscCall(PetscLayoutSetUp(B->rmap));
3334   PetscCall(PetscLayoutSetUp(B->cmap));
3335   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3336   m = B->rmap->n / bs;
3337 
3338   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
3339   PetscCall(PetscMalloc1(m + 1, &nnz));
3340   for (i = 0; i < m; i++) {
3341     nz = ii[i + 1] - ii[i];
3342     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3343     nz_max = PetscMax(nz_max, nz);
3344     nnz[i] = nz;
3345   }
3346   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
3347   PetscCall(PetscFree(nnz));
3348 
3349   values = (PetscScalar *)V;
3350   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3351   for (i = 0; i < m; i++) {
3352     PetscInt        ncols = ii[i + 1] - ii[i];
3353     const PetscInt *icols = jj + ii[i];
3354     if (bs == 1 || !roworiented) {
3355       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
3356       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
3357     } else {
3358       PetscInt j;
3359       for (j = 0; j < ncols; j++) {
3360         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
3361         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
3362       }
3363     }
3364   }
3365   if (!V) PetscCall(PetscFree(values));
3366   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3367   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3368   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3369   PetscFunctionReturn(0);
3370 }
3371 
3372 /*@C
3373    MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3374 
3375    Not Collective
3376 
3377    Input Parameter:
3378 .  mat - a `MATSEQBAIJ` matrix
3379 
3380    Output Parameter:
3381 .   array - pointer to the data
3382 
3383    Level: intermediate
3384 
3385 .seealso: `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3386 @*/
3387 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array)
3388 {
3389   PetscFunctionBegin;
3390   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
3391   PetscFunctionReturn(0);
3392 }
3393 
3394 /*@C
3395    MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3396 
3397    Not Collective
3398 
3399    Input Parameters:
3400 +  mat - a `MATSEQBAIJ` matrix
3401 -  array - pointer to the data
3402 
3403    Level: intermediate
3404 
3405 .seealso: `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3406 @*/
3407 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array)
3408 {
3409   PetscFunctionBegin;
3410   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
3411   PetscFunctionReturn(0);
3412 }
3413 
3414 /*MC
3415    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3416    block sparse compressed row format.
3417 
3418    Options Database Keys:
3419 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3420 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3421 
3422    Level: beginner
3423 
3424    Notes:
3425     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
3426     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
3427 
3428    Run with -info to see what version of the matrix-vector product is being used
3429 
3430 .seealso: `MatCreateSeqBAIJ()`
3431 M*/
3432 
3433 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3434 
3435 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3436 {
3437   PetscMPIInt  size;
3438   Mat_SeqBAIJ *b;
3439 
3440   PetscFunctionBegin;
3441   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
3442   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3443 
3444   PetscCall(PetscNew(&b));
3445   B->data = (void *)b;
3446   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
3447 
3448   b->row          = NULL;
3449   b->col          = NULL;
3450   b->icol         = NULL;
3451   b->reallocs     = 0;
3452   b->saved_values = NULL;
3453 
3454   b->roworiented        = PETSC_TRUE;
3455   b->nonew              = 0;
3456   b->diag               = NULL;
3457   B->spptr              = NULL;
3458   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3459   b->keepnonzeropattern = PETSC_FALSE;
3460 
3461   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
3462   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
3463   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
3464   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
3465   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
3466   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
3467   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
3468   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
3469   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
3470   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
3471 #if defined(PETSC_HAVE_HYPRE)
3472   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
3473 #endif
3474   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
3475   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
3476   PetscFunctionReturn(0);
3477 }
3478 
3479 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3480 {
3481   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3482   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3483 
3484   PetscFunctionBegin;
3485   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
3486 
3487   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3488     c->imax           = a->imax;
3489     c->ilen           = a->ilen;
3490     c->free_imax_ilen = PETSC_FALSE;
3491   } else {
3492     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3493     for (i = 0; i < mbs; i++) {
3494       c->imax[i] = a->imax[i];
3495       c->ilen[i] = a->ilen[i];
3496     }
3497     c->free_imax_ilen = PETSC_TRUE;
3498   }
3499 
3500   /* allocate the matrix space */
3501   if (mallocmatspace) {
3502     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3503       PetscCall(PetscCalloc1(bs2 * nz, &c->a));
3504 
3505       c->i            = a->i;
3506       c->j            = a->j;
3507       c->singlemalloc = PETSC_FALSE;
3508       c->free_a       = PETSC_TRUE;
3509       c->free_ij      = PETSC_FALSE;
3510       c->parent       = A;
3511       C->preallocated = PETSC_TRUE;
3512       C->assembled    = PETSC_TRUE;
3513 
3514       PetscCall(PetscObjectReference((PetscObject)A));
3515       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3516       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3517     } else {
3518       PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i));
3519 
3520       c->singlemalloc = PETSC_TRUE;
3521       c->free_a       = PETSC_TRUE;
3522       c->free_ij      = PETSC_TRUE;
3523 
3524       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3525       if (mbs > 0) {
3526         PetscCall(PetscArraycpy(c->j, a->j, nz));
3527         if (cpvalues == MAT_COPY_VALUES) {
3528           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
3529         } else {
3530           PetscCall(PetscArrayzero(c->a, bs2 * nz));
3531         }
3532       }
3533       C->preallocated = PETSC_TRUE;
3534       C->assembled    = PETSC_TRUE;
3535     }
3536   }
3537 
3538   c->roworiented = a->roworiented;
3539   c->nonew       = a->nonew;
3540 
3541   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
3542   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
3543 
3544   c->bs2 = a->bs2;
3545   c->mbs = a->mbs;
3546   c->nbs = a->nbs;
3547 
3548   if (a->diag) {
3549     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3550       c->diag      = a->diag;
3551       c->free_diag = PETSC_FALSE;
3552     } else {
3553       PetscCall(PetscMalloc1(mbs + 1, &c->diag));
3554       for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i];
3555       c->free_diag = PETSC_TRUE;
3556     }
3557   } else c->diag = NULL;
3558 
3559   c->nz         = a->nz;
3560   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3561   c->solve_work = NULL;
3562   c->mult_work  = NULL;
3563   c->sor_workt  = NULL;
3564   c->sor_work   = NULL;
3565 
3566   c->compressedrow.use   = a->compressedrow.use;
3567   c->compressedrow.nrows = a->compressedrow.nrows;
3568   if (a->compressedrow.use) {
3569     i = a->compressedrow.nrows;
3570     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
3571     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
3572     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
3573   } else {
3574     c->compressedrow.use    = PETSC_FALSE;
3575     c->compressedrow.i      = NULL;
3576     c->compressedrow.rindex = NULL;
3577   }
3578   C->nonzerostate = A->nonzerostate;
3579 
3580   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
3581   PetscFunctionReturn(0);
3582 }
3583 
3584 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3585 {
3586   PetscFunctionBegin;
3587   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
3588   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
3589   PetscCall(MatSetType(*B, MATSEQBAIJ));
3590   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
3591   PetscFunctionReturn(0);
3592 }
3593 
3594 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3595 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3596 {
3597   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3598   PetscInt    *rowidxs, *colidxs;
3599   PetscScalar *matvals;
3600 
3601   PetscFunctionBegin;
3602   PetscCall(PetscViewerSetUp(viewer));
3603 
3604   /* read matrix header */
3605   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3606   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3607   M  = header[1];
3608   N  = header[2];
3609   nz = header[3];
3610   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3611   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3612   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3613 
3614   /* set block sizes from the viewer's .info file */
3615   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3616   /* set local and global sizes if not set already */
3617   if (mat->rmap->n < 0) mat->rmap->n = M;
3618   if (mat->cmap->n < 0) mat->cmap->n = N;
3619   if (mat->rmap->N < 0) mat->rmap->N = M;
3620   if (mat->cmap->N < 0) mat->cmap->N = N;
3621   PetscCall(PetscLayoutSetUp(mat->rmap));
3622   PetscCall(PetscLayoutSetUp(mat->cmap));
3623 
3624   /* check if the matrix sizes are correct */
3625   PetscCall(MatGetSize(mat, &rows, &cols));
3626   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3627   PetscCall(MatGetBlockSize(mat, &bs));
3628   PetscCall(MatGetLocalSize(mat, &m, &n));
3629   mbs = m / bs;
3630   nbs = n / bs;
3631 
3632   /* read in row lengths, column indices and nonzero values */
3633   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3634   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
3635   rowidxs[0] = 0;
3636   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3637   sum = rowidxs[m];
3638   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3639 
3640   /* read in column indices and nonzero values */
3641   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
3642   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
3643   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3644 
3645   {               /* preallocate matrix storage */
3646     PetscBT   bt; /* helper bit set to count nonzeros */
3647     PetscInt *nnz;
3648     PetscBool sbaij;
3649 
3650     PetscCall(PetscBTCreate(nbs, &bt));
3651     PetscCall(PetscCalloc1(mbs, &nnz));
3652     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3653     for (i = 0; i < mbs; i++) {
3654       PetscCall(PetscBTMemzero(nbs, bt));
3655       for (k = 0; k < bs; k++) {
3656         PetscInt row = bs * i + k;
3657         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3658           PetscInt col = colidxs[j];
3659           if (!sbaij || col >= row)
3660             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3661         }
3662       }
3663     }
3664     PetscCall(PetscBTDestroy(&bt));
3665     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
3666     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
3667     PetscCall(PetscFree(nnz));
3668   }
3669 
3670   /* store matrix values */
3671   for (i = 0; i < m; i++) {
3672     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
3673     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3674   }
3675 
3676   PetscCall(PetscFree(rowidxs));
3677   PetscCall(PetscFree2(colidxs, matvals));
3678   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3679   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3680   PetscFunctionReturn(0);
3681 }
3682 
3683 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3684 {
3685   PetscBool isbinary;
3686 
3687   PetscFunctionBegin;
3688   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3689   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3690   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
3691   PetscFunctionReturn(0);
3692 }
3693 
3694 /*@C
3695    MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3696    compressed row) format.  For good matrix assembly performance the
3697    user should preallocate the matrix storage by setting the parameter nz
3698    (or the array nnz).  By setting these parameters accurately, performance
3699    during matrix assembly can be increased by more than a factor of 50.
3700 
3701    Collective
3702 
3703    Input Parameters:
3704 +  comm - MPI communicator, set to `PETSC_COMM_SELF`
3705 .  bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3706           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3707 .  m - number of rows
3708 .  n - number of columns
3709 .  nz - number of nonzero blocks  per block row (same for all rows)
3710 -  nnz - array containing the number of nonzero blocks in the various block rows
3711          (possibly different for each block row) or NULL
3712 
3713    Output Parameter:
3714 .  A - the matrix
3715 
3716    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3717    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3718    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
3719 
3720    Options Database Keys:
3721 +   -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3722 -   -mat_block_size - size of the blocks to use
3723 
3724    Level: intermediate
3725 
3726    Notes:
3727    The number of rows and columns must be divisible by blocksize.
3728 
3729    If the nnz parameter is given then the nz parameter is ignored
3730 
3731    A nonzero block is any block that as 1 or more nonzeros in it
3732 
3733    The `MATSEQBAIJ` format is fully compatible with standard Fortran 77
3734    storage.  That is, the stored row and column indices can begin at
3735    either one (as in Fortran) or zero.  See the users' manual for details.
3736 
3737    Specify the preallocated storage with either nz or nnz (not both).
3738    Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
3739    allocation.  See [Sparse Matrices](sec_matsparse) for details.
3740    matrices.
3741 
3742 .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3743 @*/
3744 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3745 {
3746   PetscFunctionBegin;
3747   PetscCall(MatCreate(comm, A));
3748   PetscCall(MatSetSizes(*A, m, n, m, n));
3749   PetscCall(MatSetType(*A, MATSEQBAIJ));
3750   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
3751   PetscFunctionReturn(0);
3752 }
3753 
3754 /*@C
3755    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3756    per row in the matrix. For good matrix assembly performance the
3757    user should preallocate the matrix storage by setting the parameter nz
3758    (or the array nnz).  By setting these parameters accurately, performance
3759    during matrix assembly can be increased by more than a factor of 50.
3760 
3761    Collective
3762 
3763    Input Parameters:
3764 +  B - the matrix
3765 .  bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3766           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3767 .  nz - number of block nonzeros per block row (same for all rows)
3768 -  nnz - array containing the number of block nonzeros in the various block rows
3769          (possibly different for each block row) or NULL
3770 
3771    Options Database Keys:
3772 +   -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3773 -   -mat_block_size - size of the blocks to use
3774 
3775    Level: intermediate
3776 
3777    Notes:
3778    If the nnz parameter is given then the nz parameter is ignored
3779 
3780    You can call `MatGetInfo()` to get information on how effective the preallocation was;
3781    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3782    You can also run with the option -info and look for messages with the string
3783    malloc in them to see if additional memory allocation was needed.
3784 
3785    The `MATSEQBAIJ` format is fully compatible with standard Fortran 77
3786    storage.  That is, the stored row and column indices can begin at
3787    either one (as in Fortran) or zero.  See the users' manual for details.
3788 
3789    Specify the preallocated storage with either nz or nnz (not both).
3790    Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
3791    allocation.  See [Sparse Matrices](sec_matsparse) for details.
3792 
3793 .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3794 @*/
3795 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3796 {
3797   PetscFunctionBegin;
3798   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3799   PetscValidType(B, 1);
3800   PetscValidLogicalCollectiveInt(B, bs, 2);
3801   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
3802   PetscFunctionReturn(0);
3803 }
3804 
3805 /*@C
3806    MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3807 
3808    Collective
3809 
3810    Input Parameters:
3811 +  B - the matrix
3812 .  i - the indices into j for the start of each local row (starts with zero)
3813 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3814 -  v - optional values in the matrix
3815 
3816    Level: advanced
3817 
3818    Notes:
3819    The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
3820    may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
3821    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3822    `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3823    block column and the second index is over columns within a block.
3824 
3825    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3826 
3827 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3828 @*/
3829 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3830 {
3831   PetscFunctionBegin;
3832   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3833   PetscValidType(B, 1);
3834   PetscValidLogicalCollectiveInt(B, bs, 2);
3835   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
3836   PetscFunctionReturn(0);
3837 }
3838 
3839 /*@
3840      MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3841 
3842      Collective
3843 
3844    Input Parameters:
3845 +  comm - must be an MPI communicator of size 1
3846 .  bs - size of block
3847 .  m - number of rows
3848 .  n - number of columns
3849 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3850 .  j - column indices
3851 -  a - matrix values
3852 
3853    Output Parameter:
3854 .  mat - the matrix
3855 
3856    Level: advanced
3857 
3858    Notes:
3859        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3860     once the matrix is destroyed
3861 
3862        You cannot set new nonzero locations into this matrix, that will generate an error.
3863 
3864        The i and j indices are 0 based
3865 
3866        When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3867 
3868       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3869       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3870       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3871       with column-major ordering within blocks.
3872 
3873 .seealso: `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3874 @*/
3875 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3876 {
3877   PetscInt     ii;
3878   Mat_SeqBAIJ *baij;
3879 
3880   PetscFunctionBegin;
3881   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
3882   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3883 
3884   PetscCall(MatCreate(comm, mat));
3885   PetscCall(MatSetSizes(*mat, m, n, m, n));
3886   PetscCall(MatSetType(*mat, MATSEQBAIJ));
3887   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3888   baij = (Mat_SeqBAIJ *)(*mat)->data;
3889   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3890 
3891   baij->i = i;
3892   baij->j = j;
3893   baij->a = a;
3894 
3895   baij->singlemalloc = PETSC_FALSE;
3896   baij->nonew        = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3897   baij->free_a       = PETSC_FALSE;
3898   baij->free_ij      = PETSC_FALSE;
3899 
3900   for (ii = 0; ii < m; ii++) {
3901     baij->ilen[ii] = baij->imax[ii] = i[ii + 1] - i[ii];
3902     PetscCheck(i[ii + 1] - i[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, i[ii + 1] - i[ii]);
3903   }
3904   if (PetscDefined(USE_DEBUG)) {
3905     for (ii = 0; ii < baij->i[m]; ii++) {
3906       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3907       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3908     }
3909   }
3910 
3911   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3912   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3913   PetscFunctionReturn(0);
3914 }
3915 
3916 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3917 {
3918   PetscFunctionBegin;
3919   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
3920   PetscFunctionReturn(0);
3921 }
3922