xref: /petsc/src/mat/impls/baij/seq/baij.c (revision ae51ba108530e1a8d2aeb8bc3771a24c2fcd801e)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h> /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
19 
20 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
21 {
22   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
23   PetscInt     m, n, ib, jb, bs = A->rmap->bs;
24   MatScalar   *a_val = a_aij->a;
25 
26   PetscFunctionBegin;
27   PetscCall(MatGetSize(A, &m, &n));
28   PetscCall(PetscArrayzero(reductions, n));
29   if (type == NORM_2) {
30     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
31       for (jb = 0; jb < bs; jb++) {
32         for (ib = 0; ib < bs; ib++) {
33           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
34           a_val++;
35         }
36       }
37     }
38   } else if (type == NORM_1) {
39     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
40       for (jb = 0; jb < bs; jb++) {
41         for (ib = 0; ib < bs; ib++) {
42           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
43           a_val++;
44         }
45       }
46     }
47   } else if (type == NORM_INFINITY) {
48     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
49       for (jb = 0; jb < bs; jb++) {
50         for (ib = 0; ib < bs; ib++) {
51           int col         = A->cmap->rstart + a_aij->j[i] * bs + jb;
52           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
53           a_val++;
54         }
55       }
56     }
57   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
58     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
59       for (jb = 0; jb < bs; jb++) {
60         for (ib = 0; ib < bs; ib++) {
61           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
62           a_val++;
63         }
64       }
65     }
66   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
67     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
68       for (jb = 0; jb < bs; jb++) {
69         for (ib = 0; ib < bs; ib++) {
70           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
71           a_val++;
72         }
73       }
74     }
75   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
76   if (type == NORM_2) {
77     for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
78   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
79     for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
80   }
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
85 {
86   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
87   PetscInt    *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
88   MatScalar   *v     = a->a, *odiag, *diag, work[25], *v_work;
89   PetscReal    shift = 0.0;
90   PetscBool    allowzeropivot, zeropivotdetected = PETSC_FALSE;
91 
92   PetscFunctionBegin;
93   allowzeropivot = PetscNot(A->erroriffailure);
94 
95   if (a->idiagvalid) {
96     if (values) *values = a->idiag;
97     PetscFunctionReturn(PETSC_SUCCESS);
98   }
99   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
100   diag_offset = a->diag;
101   if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); }
102   diag = a->idiag;
103   if (values) *values = a->idiag;
104   /* factor and invert each block */
105   switch (bs) {
106   case 1:
107     for (i = 0; i < mbs; i++) {
108       odiag   = v + 1 * diag_offset[i];
109       diag[0] = odiag[0];
110 
111       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
112         if (allowzeropivot) {
113           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
114           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
115           A->factorerror_zeropivot_row   = i;
116           PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
117         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
118       }
119 
120       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
121       diag += 1;
122     }
123     break;
124   case 2:
125     for (i = 0; i < mbs; i++) {
126       odiag   = v + 4 * diag_offset[i];
127       diag[0] = odiag[0];
128       diag[1] = odiag[1];
129       diag[2] = odiag[2];
130       diag[3] = odiag[3];
131       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
132       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
133       diag += 4;
134     }
135     break;
136   case 3:
137     for (i = 0; i < mbs; i++) {
138       odiag   = v + 9 * diag_offset[i];
139       diag[0] = odiag[0];
140       diag[1] = odiag[1];
141       diag[2] = odiag[2];
142       diag[3] = odiag[3];
143       diag[4] = odiag[4];
144       diag[5] = odiag[5];
145       diag[6] = odiag[6];
146       diag[7] = odiag[7];
147       diag[8] = odiag[8];
148       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
149       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
150       diag += 9;
151     }
152     break;
153   case 4:
154     for (i = 0; i < mbs; i++) {
155       odiag = v + 16 * diag_offset[i];
156       PetscCall(PetscArraycpy(diag, odiag, 16));
157       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
158       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
159       diag += 16;
160     }
161     break;
162   case 5:
163     for (i = 0; i < mbs; i++) {
164       odiag = v + 25 * diag_offset[i];
165       PetscCall(PetscArraycpy(diag, odiag, 25));
166       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
167       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
168       diag += 25;
169     }
170     break;
171   case 6:
172     for (i = 0; i < mbs; i++) {
173       odiag = v + 36 * diag_offset[i];
174       PetscCall(PetscArraycpy(diag, odiag, 36));
175       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
176       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
177       diag += 36;
178     }
179     break;
180   case 7:
181     for (i = 0; i < mbs; i++) {
182       odiag = v + 49 * diag_offset[i];
183       PetscCall(PetscArraycpy(diag, odiag, 49));
184       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
185       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
186       diag += 49;
187     }
188     break;
189   default:
190     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
191     for (i = 0; i < mbs; i++) {
192       odiag = v + bs2 * diag_offset[i];
193       PetscCall(PetscArraycpy(diag, odiag, bs2));
194       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
195       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
196       diag += bs2;
197     }
198     PetscCall(PetscFree2(v_work, v_pivots));
199   }
200   a->idiagvalid = PETSC_TRUE;
201   PetscFunctionReturn(PETSC_SUCCESS);
202 }
203 
204 PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
205 {
206   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
207   PetscScalar       *x, *work, *w, *workt, *t;
208   const MatScalar   *v, *aa = a->a, *idiag;
209   const PetscScalar *b, *xb;
210   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
211   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
212   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;
213 
214   PetscFunctionBegin;
215   its = its * lits;
216   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
217   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
218   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
219   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
220   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
221 
222   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
223 
224   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
225   diag  = a->diag;
226   idiag = a->idiag;
227   k     = PetscMax(A->rmap->n, A->cmap->n);
228   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
229   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
230   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
231   work = a->mult_work;
232   t    = a->sor_workt;
233   w    = a->sor_work;
234 
235   PetscCall(VecGetArray(xx, &x));
236   PetscCall(VecGetArrayRead(bb, &b));
237 
238   if (flag & SOR_ZERO_INITIAL_GUESS) {
239     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
240       switch (bs) {
241       case 1:
242         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
243         t[0] = b[0];
244         i2   = 1;
245         idiag += 1;
246         for (i = 1; i < m; i++) {
247           v    = aa + ai[i];
248           vi   = aj + ai[i];
249           nz   = diag[i] - ai[i];
250           s[0] = b[i2];
251           for (j = 0; j < nz; j++) {
252             xw[0] = x[vi[j]];
253             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
254           }
255           t[i2] = s[0];
256           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
257           x[i2] = xw[0];
258           idiag += 1;
259           i2 += 1;
260         }
261         break;
262       case 2:
263         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
264         t[0] = b[0];
265         t[1] = b[1];
266         i2   = 2;
267         idiag += 4;
268         for (i = 1; i < m; i++) {
269           v    = aa + 4 * ai[i];
270           vi   = aj + ai[i];
271           nz   = diag[i] - ai[i];
272           s[0] = b[i2];
273           s[1] = b[i2 + 1];
274           for (j = 0; j < nz; j++) {
275             idx   = 2 * vi[j];
276             it    = 4 * j;
277             xw[0] = x[idx];
278             xw[1] = x[1 + idx];
279             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
280           }
281           t[i2]     = s[0];
282           t[i2 + 1] = s[1];
283           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
284           x[i2]     = xw[0];
285           x[i2 + 1] = xw[1];
286           idiag += 4;
287           i2 += 2;
288         }
289         break;
290       case 3:
291         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
292         t[0] = b[0];
293         t[1] = b[1];
294         t[2] = b[2];
295         i2   = 3;
296         idiag += 9;
297         for (i = 1; i < m; i++) {
298           v    = aa + 9 * ai[i];
299           vi   = aj + ai[i];
300           nz   = diag[i] - ai[i];
301           s[0] = b[i2];
302           s[1] = b[i2 + 1];
303           s[2] = b[i2 + 2];
304           while (nz--) {
305             idx   = 3 * (*vi++);
306             xw[0] = x[idx];
307             xw[1] = x[1 + idx];
308             xw[2] = x[2 + idx];
309             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
310             v += 9;
311           }
312           t[i2]     = s[0];
313           t[i2 + 1] = s[1];
314           t[i2 + 2] = s[2];
315           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
316           x[i2]     = xw[0];
317           x[i2 + 1] = xw[1];
318           x[i2 + 2] = xw[2];
319           idiag += 9;
320           i2 += 3;
321         }
322         break;
323       case 4:
324         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
325         t[0] = b[0];
326         t[1] = b[1];
327         t[2] = b[2];
328         t[3] = b[3];
329         i2   = 4;
330         idiag += 16;
331         for (i = 1; i < m; i++) {
332           v    = aa + 16 * ai[i];
333           vi   = aj + ai[i];
334           nz   = diag[i] - ai[i];
335           s[0] = b[i2];
336           s[1] = b[i2 + 1];
337           s[2] = b[i2 + 2];
338           s[3] = b[i2 + 3];
339           while (nz--) {
340             idx   = 4 * (*vi++);
341             xw[0] = x[idx];
342             xw[1] = x[1 + idx];
343             xw[2] = x[2 + idx];
344             xw[3] = x[3 + idx];
345             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
346             v += 16;
347           }
348           t[i2]     = s[0];
349           t[i2 + 1] = s[1];
350           t[i2 + 2] = s[2];
351           t[i2 + 3] = s[3];
352           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
353           x[i2]     = xw[0];
354           x[i2 + 1] = xw[1];
355           x[i2 + 2] = xw[2];
356           x[i2 + 3] = xw[3];
357           idiag += 16;
358           i2 += 4;
359         }
360         break;
361       case 5:
362         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
363         t[0] = b[0];
364         t[1] = b[1];
365         t[2] = b[2];
366         t[3] = b[3];
367         t[4] = b[4];
368         i2   = 5;
369         idiag += 25;
370         for (i = 1; i < m; i++) {
371           v    = aa + 25 * ai[i];
372           vi   = aj + ai[i];
373           nz   = diag[i] - ai[i];
374           s[0] = b[i2];
375           s[1] = b[i2 + 1];
376           s[2] = b[i2 + 2];
377           s[3] = b[i2 + 3];
378           s[4] = b[i2 + 4];
379           while (nz--) {
380             idx   = 5 * (*vi++);
381             xw[0] = x[idx];
382             xw[1] = x[1 + idx];
383             xw[2] = x[2 + idx];
384             xw[3] = x[3 + idx];
385             xw[4] = x[4 + idx];
386             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
387             v += 25;
388           }
389           t[i2]     = s[0];
390           t[i2 + 1] = s[1];
391           t[i2 + 2] = s[2];
392           t[i2 + 3] = s[3];
393           t[i2 + 4] = s[4];
394           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
395           x[i2]     = xw[0];
396           x[i2 + 1] = xw[1];
397           x[i2 + 2] = xw[2];
398           x[i2 + 3] = xw[3];
399           x[i2 + 4] = xw[4];
400           idiag += 25;
401           i2 += 5;
402         }
403         break;
404       case 6:
405         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
406         t[0] = b[0];
407         t[1] = b[1];
408         t[2] = b[2];
409         t[3] = b[3];
410         t[4] = b[4];
411         t[5] = b[5];
412         i2   = 6;
413         idiag += 36;
414         for (i = 1; i < m; i++) {
415           v    = aa + 36 * ai[i];
416           vi   = aj + ai[i];
417           nz   = diag[i] - ai[i];
418           s[0] = b[i2];
419           s[1] = b[i2 + 1];
420           s[2] = b[i2 + 2];
421           s[3] = b[i2 + 3];
422           s[4] = b[i2 + 4];
423           s[5] = b[i2 + 5];
424           while (nz--) {
425             idx   = 6 * (*vi++);
426             xw[0] = x[idx];
427             xw[1] = x[1 + idx];
428             xw[2] = x[2 + idx];
429             xw[3] = x[3 + idx];
430             xw[4] = x[4 + idx];
431             xw[5] = x[5 + idx];
432             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
433             v += 36;
434           }
435           t[i2]     = s[0];
436           t[i2 + 1] = s[1];
437           t[i2 + 2] = s[2];
438           t[i2 + 3] = s[3];
439           t[i2 + 4] = s[4];
440           t[i2 + 5] = s[5];
441           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
442           x[i2]     = xw[0];
443           x[i2 + 1] = xw[1];
444           x[i2 + 2] = xw[2];
445           x[i2 + 3] = xw[3];
446           x[i2 + 4] = xw[4];
447           x[i2 + 5] = xw[5];
448           idiag += 36;
449           i2 += 6;
450         }
451         break;
452       case 7:
453         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
454         t[0] = b[0];
455         t[1] = b[1];
456         t[2] = b[2];
457         t[3] = b[3];
458         t[4] = b[4];
459         t[5] = b[5];
460         t[6] = b[6];
461         i2   = 7;
462         idiag += 49;
463         for (i = 1; i < m; i++) {
464           v    = aa + 49 * ai[i];
465           vi   = aj + ai[i];
466           nz   = diag[i] - ai[i];
467           s[0] = b[i2];
468           s[1] = b[i2 + 1];
469           s[2] = b[i2 + 2];
470           s[3] = b[i2 + 3];
471           s[4] = b[i2 + 4];
472           s[5] = b[i2 + 5];
473           s[6] = b[i2 + 6];
474           while (nz--) {
475             idx   = 7 * (*vi++);
476             xw[0] = x[idx];
477             xw[1] = x[1 + idx];
478             xw[2] = x[2 + idx];
479             xw[3] = x[3 + idx];
480             xw[4] = x[4 + idx];
481             xw[5] = x[5 + idx];
482             xw[6] = x[6 + idx];
483             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
484             v += 49;
485           }
486           t[i2]     = s[0];
487           t[i2 + 1] = s[1];
488           t[i2 + 2] = s[2];
489           t[i2 + 3] = s[3];
490           t[i2 + 4] = s[4];
491           t[i2 + 5] = s[5];
492           t[i2 + 6] = s[6];
493           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
494           x[i2]     = xw[0];
495           x[i2 + 1] = xw[1];
496           x[i2 + 2] = xw[2];
497           x[i2 + 3] = xw[3];
498           x[i2 + 4] = xw[4];
499           x[i2 + 5] = xw[5];
500           x[i2 + 6] = xw[6];
501           idiag += 49;
502           i2 += 7;
503         }
504         break;
505       default:
506         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
507         PetscCall(PetscArraycpy(t, b, bs));
508         i2 = bs;
509         idiag += bs2;
510         for (i = 1; i < m; i++) {
511           v  = aa + bs2 * ai[i];
512           vi = aj + ai[i];
513           nz = diag[i] - ai[i];
514 
515           PetscCall(PetscArraycpy(w, b + i2, bs));
516           /* copy all rows of x that are needed into contiguous space */
517           workt = work;
518           for (j = 0; j < nz; j++) {
519             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
520             workt += bs;
521           }
522           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
523           PetscCall(PetscArraycpy(t + i2, w, bs));
524           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
525 
526           idiag += bs2;
527           i2 += bs;
528         }
529         break;
530       }
531       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
532       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
533       xb = t;
534     } else xb = b;
535     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
536       idiag = a->idiag + bs2 * (a->mbs - 1);
537       i2    = bs * (m - 1);
538       switch (bs) {
539       case 1:
540         s[0] = xb[i2];
541         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
542         x[i2] = xw[0];
543         i2 -= 1;
544         for (i = m - 2; i >= 0; i--) {
545           v    = aa + (diag[i] + 1);
546           vi   = aj + diag[i] + 1;
547           nz   = ai[i + 1] - diag[i] - 1;
548           s[0] = xb[i2];
549           for (j = 0; j < nz; j++) {
550             xw[0] = x[vi[j]];
551             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
552           }
553           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
554           x[i2] = xw[0];
555           idiag -= 1;
556           i2 -= 1;
557         }
558         break;
559       case 2:
560         s[0] = xb[i2];
561         s[1] = xb[i2 + 1];
562         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
563         x[i2]     = xw[0];
564         x[i2 + 1] = xw[1];
565         i2 -= 2;
566         idiag -= 4;
567         for (i = m - 2; i >= 0; i--) {
568           v    = aa + 4 * (diag[i] + 1);
569           vi   = aj + diag[i] + 1;
570           nz   = ai[i + 1] - diag[i] - 1;
571           s[0] = xb[i2];
572           s[1] = xb[i2 + 1];
573           for (j = 0; j < nz; j++) {
574             idx   = 2 * vi[j];
575             it    = 4 * j;
576             xw[0] = x[idx];
577             xw[1] = x[1 + idx];
578             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
579           }
580           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
581           x[i2]     = xw[0];
582           x[i2 + 1] = xw[1];
583           idiag -= 4;
584           i2 -= 2;
585         }
586         break;
587       case 3:
588         s[0] = xb[i2];
589         s[1] = xb[i2 + 1];
590         s[2] = xb[i2 + 2];
591         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
592         x[i2]     = xw[0];
593         x[i2 + 1] = xw[1];
594         x[i2 + 2] = xw[2];
595         i2 -= 3;
596         idiag -= 9;
597         for (i = m - 2; i >= 0; i--) {
598           v    = aa + 9 * (diag[i] + 1);
599           vi   = aj + diag[i] + 1;
600           nz   = ai[i + 1] - diag[i] - 1;
601           s[0] = xb[i2];
602           s[1] = xb[i2 + 1];
603           s[2] = xb[i2 + 2];
604           while (nz--) {
605             idx   = 3 * (*vi++);
606             xw[0] = x[idx];
607             xw[1] = x[1 + idx];
608             xw[2] = x[2 + idx];
609             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
610             v += 9;
611           }
612           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
613           x[i2]     = xw[0];
614           x[i2 + 1] = xw[1];
615           x[i2 + 2] = xw[2];
616           idiag -= 9;
617           i2 -= 3;
618         }
619         break;
620       case 4:
621         s[0] = xb[i2];
622         s[1] = xb[i2 + 1];
623         s[2] = xb[i2 + 2];
624         s[3] = xb[i2 + 3];
625         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
626         x[i2]     = xw[0];
627         x[i2 + 1] = xw[1];
628         x[i2 + 2] = xw[2];
629         x[i2 + 3] = xw[3];
630         i2 -= 4;
631         idiag -= 16;
632         for (i = m - 2; i >= 0; i--) {
633           v    = aa + 16 * (diag[i] + 1);
634           vi   = aj + diag[i] + 1;
635           nz   = ai[i + 1] - diag[i] - 1;
636           s[0] = xb[i2];
637           s[1] = xb[i2 + 1];
638           s[2] = xb[i2 + 2];
639           s[3] = xb[i2 + 3];
640           while (nz--) {
641             idx   = 4 * (*vi++);
642             xw[0] = x[idx];
643             xw[1] = x[1 + idx];
644             xw[2] = x[2 + idx];
645             xw[3] = x[3 + idx];
646             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
647             v += 16;
648           }
649           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
650           x[i2]     = xw[0];
651           x[i2 + 1] = xw[1];
652           x[i2 + 2] = xw[2];
653           x[i2 + 3] = xw[3];
654           idiag -= 16;
655           i2 -= 4;
656         }
657         break;
658       case 5:
659         s[0] = xb[i2];
660         s[1] = xb[i2 + 1];
661         s[2] = xb[i2 + 2];
662         s[3] = xb[i2 + 3];
663         s[4] = xb[i2 + 4];
664         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
665         x[i2]     = xw[0];
666         x[i2 + 1] = xw[1];
667         x[i2 + 2] = xw[2];
668         x[i2 + 3] = xw[3];
669         x[i2 + 4] = xw[4];
670         i2 -= 5;
671         idiag -= 25;
672         for (i = m - 2; i >= 0; i--) {
673           v    = aa + 25 * (diag[i] + 1);
674           vi   = aj + diag[i] + 1;
675           nz   = ai[i + 1] - diag[i] - 1;
676           s[0] = xb[i2];
677           s[1] = xb[i2 + 1];
678           s[2] = xb[i2 + 2];
679           s[3] = xb[i2 + 3];
680           s[4] = xb[i2 + 4];
681           while (nz--) {
682             idx   = 5 * (*vi++);
683             xw[0] = x[idx];
684             xw[1] = x[1 + idx];
685             xw[2] = x[2 + idx];
686             xw[3] = x[3 + idx];
687             xw[4] = x[4 + idx];
688             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
689             v += 25;
690           }
691           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
692           x[i2]     = xw[0];
693           x[i2 + 1] = xw[1];
694           x[i2 + 2] = xw[2];
695           x[i2 + 3] = xw[3];
696           x[i2 + 4] = xw[4];
697           idiag -= 25;
698           i2 -= 5;
699         }
700         break;
701       case 6:
702         s[0] = xb[i2];
703         s[1] = xb[i2 + 1];
704         s[2] = xb[i2 + 2];
705         s[3] = xb[i2 + 3];
706         s[4] = xb[i2 + 4];
707         s[5] = xb[i2 + 5];
708         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
709         x[i2]     = xw[0];
710         x[i2 + 1] = xw[1];
711         x[i2 + 2] = xw[2];
712         x[i2 + 3] = xw[3];
713         x[i2 + 4] = xw[4];
714         x[i2 + 5] = xw[5];
715         i2 -= 6;
716         idiag -= 36;
717         for (i = m - 2; i >= 0; i--) {
718           v    = aa + 36 * (diag[i] + 1);
719           vi   = aj + diag[i] + 1;
720           nz   = ai[i + 1] - diag[i] - 1;
721           s[0] = xb[i2];
722           s[1] = xb[i2 + 1];
723           s[2] = xb[i2 + 2];
724           s[3] = xb[i2 + 3];
725           s[4] = xb[i2 + 4];
726           s[5] = xb[i2 + 5];
727           while (nz--) {
728             idx   = 6 * (*vi++);
729             xw[0] = x[idx];
730             xw[1] = x[1 + idx];
731             xw[2] = x[2 + idx];
732             xw[3] = x[3 + idx];
733             xw[4] = x[4 + idx];
734             xw[5] = x[5 + idx];
735             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
736             v += 36;
737           }
738           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
739           x[i2]     = xw[0];
740           x[i2 + 1] = xw[1];
741           x[i2 + 2] = xw[2];
742           x[i2 + 3] = xw[3];
743           x[i2 + 4] = xw[4];
744           x[i2 + 5] = xw[5];
745           idiag -= 36;
746           i2 -= 6;
747         }
748         break;
749       case 7:
750         s[0] = xb[i2];
751         s[1] = xb[i2 + 1];
752         s[2] = xb[i2 + 2];
753         s[3] = xb[i2 + 3];
754         s[4] = xb[i2 + 4];
755         s[5] = xb[i2 + 5];
756         s[6] = xb[i2 + 6];
757         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
758         x[i2]     = xw[0];
759         x[i2 + 1] = xw[1];
760         x[i2 + 2] = xw[2];
761         x[i2 + 3] = xw[3];
762         x[i2 + 4] = xw[4];
763         x[i2 + 5] = xw[5];
764         x[i2 + 6] = xw[6];
765         i2 -= 7;
766         idiag -= 49;
767         for (i = m - 2; i >= 0; i--) {
768           v    = aa + 49 * (diag[i] + 1);
769           vi   = aj + diag[i] + 1;
770           nz   = ai[i + 1] - diag[i] - 1;
771           s[0] = xb[i2];
772           s[1] = xb[i2 + 1];
773           s[2] = xb[i2 + 2];
774           s[3] = xb[i2 + 3];
775           s[4] = xb[i2 + 4];
776           s[5] = xb[i2 + 5];
777           s[6] = xb[i2 + 6];
778           while (nz--) {
779             idx   = 7 * (*vi++);
780             xw[0] = x[idx];
781             xw[1] = x[1 + idx];
782             xw[2] = x[2 + idx];
783             xw[3] = x[3 + idx];
784             xw[4] = x[4 + idx];
785             xw[5] = x[5 + idx];
786             xw[6] = x[6 + idx];
787             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
788             v += 49;
789           }
790           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
791           x[i2]     = xw[0];
792           x[i2 + 1] = xw[1];
793           x[i2 + 2] = xw[2];
794           x[i2 + 3] = xw[3];
795           x[i2 + 4] = xw[4];
796           x[i2 + 5] = xw[5];
797           x[i2 + 6] = xw[6];
798           idiag -= 49;
799           i2 -= 7;
800         }
801         break;
802       default:
803         PetscCall(PetscArraycpy(w, xb + i2, bs));
804         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
805         i2 -= bs;
806         idiag -= bs2;
807         for (i = m - 2; i >= 0; i--) {
808           v  = aa + bs2 * (diag[i] + 1);
809           vi = aj + diag[i] + 1;
810           nz = ai[i + 1] - diag[i] - 1;
811 
812           PetscCall(PetscArraycpy(w, xb + i2, bs));
813           /* copy all rows of x that are needed into contiguous space */
814           workt = work;
815           for (j = 0; j < nz; j++) {
816             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
817             workt += bs;
818           }
819           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
820           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
821 
822           idiag -= bs2;
823           i2 -= bs;
824         }
825         break;
826       }
827       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
828     }
829     its--;
830   }
831   while (its--) {
832     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
833       idiag = a->idiag;
834       i2    = 0;
835       switch (bs) {
836       case 1:
837         for (i = 0; i < m; i++) {
838           v    = aa + ai[i];
839           vi   = aj + ai[i];
840           nz   = ai[i + 1] - ai[i];
841           s[0] = b[i2];
842           for (j = 0; j < nz; j++) {
843             xw[0] = x[vi[j]];
844             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
845           }
846           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
847           x[i2] += xw[0];
848           idiag += 1;
849           i2 += 1;
850         }
851         break;
852       case 2:
853         for (i = 0; i < m; i++) {
854           v    = aa + 4 * ai[i];
855           vi   = aj + ai[i];
856           nz   = ai[i + 1] - ai[i];
857           s[0] = b[i2];
858           s[1] = b[i2 + 1];
859           for (j = 0; j < nz; j++) {
860             idx   = 2 * vi[j];
861             it    = 4 * j;
862             xw[0] = x[idx];
863             xw[1] = x[1 + idx];
864             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
865           }
866           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
867           x[i2] += xw[0];
868           x[i2 + 1] += xw[1];
869           idiag += 4;
870           i2 += 2;
871         }
872         break;
873       case 3:
874         for (i = 0; i < m; i++) {
875           v    = aa + 9 * ai[i];
876           vi   = aj + ai[i];
877           nz   = ai[i + 1] - ai[i];
878           s[0] = b[i2];
879           s[1] = b[i2 + 1];
880           s[2] = b[i2 + 2];
881           while (nz--) {
882             idx   = 3 * (*vi++);
883             xw[0] = x[idx];
884             xw[1] = x[1 + idx];
885             xw[2] = x[2 + idx];
886             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
887             v += 9;
888           }
889           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
890           x[i2] += xw[0];
891           x[i2 + 1] += xw[1];
892           x[i2 + 2] += xw[2];
893           idiag += 9;
894           i2 += 3;
895         }
896         break;
897       case 4:
898         for (i = 0; i < m; i++) {
899           v    = aa + 16 * ai[i];
900           vi   = aj + ai[i];
901           nz   = ai[i + 1] - ai[i];
902           s[0] = b[i2];
903           s[1] = b[i2 + 1];
904           s[2] = b[i2 + 2];
905           s[3] = b[i2 + 3];
906           while (nz--) {
907             idx   = 4 * (*vi++);
908             xw[0] = x[idx];
909             xw[1] = x[1 + idx];
910             xw[2] = x[2 + idx];
911             xw[3] = x[3 + idx];
912             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
913             v += 16;
914           }
915           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
916           x[i2] += xw[0];
917           x[i2 + 1] += xw[1];
918           x[i2 + 2] += xw[2];
919           x[i2 + 3] += xw[3];
920           idiag += 16;
921           i2 += 4;
922         }
923         break;
924       case 5:
925         for (i = 0; i < m; i++) {
926           v    = aa + 25 * ai[i];
927           vi   = aj + ai[i];
928           nz   = ai[i + 1] - ai[i];
929           s[0] = b[i2];
930           s[1] = b[i2 + 1];
931           s[2] = b[i2 + 2];
932           s[3] = b[i2 + 3];
933           s[4] = b[i2 + 4];
934           while (nz--) {
935             idx   = 5 * (*vi++);
936             xw[0] = x[idx];
937             xw[1] = x[1 + idx];
938             xw[2] = x[2 + idx];
939             xw[3] = x[3 + idx];
940             xw[4] = x[4 + idx];
941             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
942             v += 25;
943           }
944           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
945           x[i2] += xw[0];
946           x[i2 + 1] += xw[1];
947           x[i2 + 2] += xw[2];
948           x[i2 + 3] += xw[3];
949           x[i2 + 4] += xw[4];
950           idiag += 25;
951           i2 += 5;
952         }
953         break;
954       case 6:
955         for (i = 0; i < m; i++) {
956           v    = aa + 36 * ai[i];
957           vi   = aj + ai[i];
958           nz   = ai[i + 1] - ai[i];
959           s[0] = b[i2];
960           s[1] = b[i2 + 1];
961           s[2] = b[i2 + 2];
962           s[3] = b[i2 + 3];
963           s[4] = b[i2 + 4];
964           s[5] = b[i2 + 5];
965           while (nz--) {
966             idx   = 6 * (*vi++);
967             xw[0] = x[idx];
968             xw[1] = x[1 + idx];
969             xw[2] = x[2 + idx];
970             xw[3] = x[3 + idx];
971             xw[4] = x[4 + idx];
972             xw[5] = x[5 + idx];
973             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
974             v += 36;
975           }
976           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
977           x[i2] += xw[0];
978           x[i2 + 1] += xw[1];
979           x[i2 + 2] += xw[2];
980           x[i2 + 3] += xw[3];
981           x[i2 + 4] += xw[4];
982           x[i2 + 5] += xw[5];
983           idiag += 36;
984           i2 += 6;
985         }
986         break;
987       case 7:
988         for (i = 0; i < m; i++) {
989           v    = aa + 49 * ai[i];
990           vi   = aj + ai[i];
991           nz   = ai[i + 1] - ai[i];
992           s[0] = b[i2];
993           s[1] = b[i2 + 1];
994           s[2] = b[i2 + 2];
995           s[3] = b[i2 + 3];
996           s[4] = b[i2 + 4];
997           s[5] = b[i2 + 5];
998           s[6] = b[i2 + 6];
999           while (nz--) {
1000             idx   = 7 * (*vi++);
1001             xw[0] = x[idx];
1002             xw[1] = x[1 + idx];
1003             xw[2] = x[2 + idx];
1004             xw[3] = x[3 + idx];
1005             xw[4] = x[4 + idx];
1006             xw[5] = x[5 + idx];
1007             xw[6] = x[6 + idx];
1008             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1009             v += 49;
1010           }
1011           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1012           x[i2] += xw[0];
1013           x[i2 + 1] += xw[1];
1014           x[i2 + 2] += xw[2];
1015           x[i2 + 3] += xw[3];
1016           x[i2 + 4] += xw[4];
1017           x[i2 + 5] += xw[5];
1018           x[i2 + 6] += xw[6];
1019           idiag += 49;
1020           i2 += 7;
1021         }
1022         break;
1023       default:
1024         for (i = 0; i < m; i++) {
1025           v  = aa + bs2 * ai[i];
1026           vi = aj + ai[i];
1027           nz = ai[i + 1] - ai[i];
1028 
1029           PetscCall(PetscArraycpy(w, b + i2, bs));
1030           /* copy all rows of x that are needed into contiguous space */
1031           workt = work;
1032           for (j = 0; j < nz; j++) {
1033             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1034             workt += bs;
1035           }
1036           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1037           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1038 
1039           idiag += bs2;
1040           i2 += bs;
1041         }
1042         break;
1043       }
1044       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1045     }
1046     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1047       idiag = a->idiag + bs2 * (a->mbs - 1);
1048       i2    = bs * (m - 1);
1049       switch (bs) {
1050       case 1:
1051         for (i = m - 1; i >= 0; i--) {
1052           v    = aa + ai[i];
1053           vi   = aj + ai[i];
1054           nz   = ai[i + 1] - ai[i];
1055           s[0] = b[i2];
1056           for (j = 0; j < nz; j++) {
1057             xw[0] = x[vi[j]];
1058             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1059           }
1060           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1061           x[i2] += xw[0];
1062           idiag -= 1;
1063           i2 -= 1;
1064         }
1065         break;
1066       case 2:
1067         for (i = m - 1; i >= 0; i--) {
1068           v    = aa + 4 * ai[i];
1069           vi   = aj + ai[i];
1070           nz   = ai[i + 1] - ai[i];
1071           s[0] = b[i2];
1072           s[1] = b[i2 + 1];
1073           for (j = 0; j < nz; j++) {
1074             idx   = 2 * vi[j];
1075             it    = 4 * j;
1076             xw[0] = x[idx];
1077             xw[1] = x[1 + idx];
1078             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1079           }
1080           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
1081           x[i2] += xw[0];
1082           x[i2 + 1] += xw[1];
1083           idiag -= 4;
1084           i2 -= 2;
1085         }
1086         break;
1087       case 3:
1088         for (i = m - 1; i >= 0; i--) {
1089           v    = aa + 9 * ai[i];
1090           vi   = aj + ai[i];
1091           nz   = ai[i + 1] - ai[i];
1092           s[0] = b[i2];
1093           s[1] = b[i2 + 1];
1094           s[2] = b[i2 + 2];
1095           while (nz--) {
1096             idx   = 3 * (*vi++);
1097             xw[0] = x[idx];
1098             xw[1] = x[1 + idx];
1099             xw[2] = x[2 + idx];
1100             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1101             v += 9;
1102           }
1103           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
1104           x[i2] += xw[0];
1105           x[i2 + 1] += xw[1];
1106           x[i2 + 2] += xw[2];
1107           idiag -= 9;
1108           i2 -= 3;
1109         }
1110         break;
1111       case 4:
1112         for (i = m - 1; i >= 0; i--) {
1113           v    = aa + 16 * ai[i];
1114           vi   = aj + ai[i];
1115           nz   = ai[i + 1] - ai[i];
1116           s[0] = b[i2];
1117           s[1] = b[i2 + 1];
1118           s[2] = b[i2 + 2];
1119           s[3] = b[i2 + 3];
1120           while (nz--) {
1121             idx   = 4 * (*vi++);
1122             xw[0] = x[idx];
1123             xw[1] = x[1 + idx];
1124             xw[2] = x[2 + idx];
1125             xw[3] = x[3 + idx];
1126             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1127             v += 16;
1128           }
1129           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
1130           x[i2] += xw[0];
1131           x[i2 + 1] += xw[1];
1132           x[i2 + 2] += xw[2];
1133           x[i2 + 3] += xw[3];
1134           idiag -= 16;
1135           i2 -= 4;
1136         }
1137         break;
1138       case 5:
1139         for (i = m - 1; i >= 0; i--) {
1140           v    = aa + 25 * ai[i];
1141           vi   = aj + ai[i];
1142           nz   = ai[i + 1] - ai[i];
1143           s[0] = b[i2];
1144           s[1] = b[i2 + 1];
1145           s[2] = b[i2 + 2];
1146           s[3] = b[i2 + 3];
1147           s[4] = b[i2 + 4];
1148           while (nz--) {
1149             idx   = 5 * (*vi++);
1150             xw[0] = x[idx];
1151             xw[1] = x[1 + idx];
1152             xw[2] = x[2 + idx];
1153             xw[3] = x[3 + idx];
1154             xw[4] = x[4 + idx];
1155             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1156             v += 25;
1157           }
1158           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
1159           x[i2] += xw[0];
1160           x[i2 + 1] += xw[1];
1161           x[i2 + 2] += xw[2];
1162           x[i2 + 3] += xw[3];
1163           x[i2 + 4] += xw[4];
1164           idiag -= 25;
1165           i2 -= 5;
1166         }
1167         break;
1168       case 6:
1169         for (i = m - 1; i >= 0; i--) {
1170           v    = aa + 36 * ai[i];
1171           vi   = aj + ai[i];
1172           nz   = ai[i + 1] - ai[i];
1173           s[0] = b[i2];
1174           s[1] = b[i2 + 1];
1175           s[2] = b[i2 + 2];
1176           s[3] = b[i2 + 3];
1177           s[4] = b[i2 + 4];
1178           s[5] = b[i2 + 5];
1179           while (nz--) {
1180             idx   = 6 * (*vi++);
1181             xw[0] = x[idx];
1182             xw[1] = x[1 + idx];
1183             xw[2] = x[2 + idx];
1184             xw[3] = x[3 + idx];
1185             xw[4] = x[4 + idx];
1186             xw[5] = x[5 + idx];
1187             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1188             v += 36;
1189           }
1190           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
1191           x[i2] += xw[0];
1192           x[i2 + 1] += xw[1];
1193           x[i2 + 2] += xw[2];
1194           x[i2 + 3] += xw[3];
1195           x[i2 + 4] += xw[4];
1196           x[i2 + 5] += xw[5];
1197           idiag -= 36;
1198           i2 -= 6;
1199         }
1200         break;
1201       case 7:
1202         for (i = m - 1; i >= 0; i--) {
1203           v    = aa + 49 * ai[i];
1204           vi   = aj + ai[i];
1205           nz   = ai[i + 1] - ai[i];
1206           s[0] = b[i2];
1207           s[1] = b[i2 + 1];
1208           s[2] = b[i2 + 2];
1209           s[3] = b[i2 + 3];
1210           s[4] = b[i2 + 4];
1211           s[5] = b[i2 + 5];
1212           s[6] = b[i2 + 6];
1213           while (nz--) {
1214             idx   = 7 * (*vi++);
1215             xw[0] = x[idx];
1216             xw[1] = x[1 + idx];
1217             xw[2] = x[2 + idx];
1218             xw[3] = x[3 + idx];
1219             xw[4] = x[4 + idx];
1220             xw[5] = x[5 + idx];
1221             xw[6] = x[6 + idx];
1222             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1223             v += 49;
1224           }
1225           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1226           x[i2] += xw[0];
1227           x[i2 + 1] += xw[1];
1228           x[i2 + 2] += xw[2];
1229           x[i2 + 3] += xw[3];
1230           x[i2 + 4] += xw[4];
1231           x[i2 + 5] += xw[5];
1232           x[i2 + 6] += xw[6];
1233           idiag -= 49;
1234           i2 -= 7;
1235         }
1236         break;
1237       default:
1238         for (i = m - 1; i >= 0; i--) {
1239           v  = aa + bs2 * ai[i];
1240           vi = aj + ai[i];
1241           nz = ai[i + 1] - ai[i];
1242 
1243           PetscCall(PetscArraycpy(w, b + i2, bs));
1244           /* copy all rows of x that are needed into contiguous space */
1245           workt = work;
1246           for (j = 0; j < nz; j++) {
1247             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1248             workt += bs;
1249           }
1250           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1251           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1252 
1253           idiag -= bs2;
1254           i2 -= bs;
1255         }
1256         break;
1257       }
1258       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1259     }
1260   }
1261   PetscCall(VecRestoreArray(xx, &x));
1262   PetscCall(VecRestoreArrayRead(bb, &b));
1263   PetscFunctionReturn(PETSC_SUCCESS);
1264 }
1265 
1266 /*
1267     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1268 */
1269 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1270   #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1271 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1272   #define matsetvaluesblocked4_ matsetvaluesblocked4
1273 #endif
1274 
1275 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1276 {
1277   Mat                A = *AA;
1278   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1279   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1280   PetscInt          *ai = a->i, *ailen = a->ilen;
1281   PetscInt          *aj = a->j, stepval, lastcol = -1;
1282   const PetscScalar *value = v;
1283   MatScalar         *ap, *aa = a->a, *bap;
1284 
1285   PetscFunctionBegin;
1286   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1287   stepval = (n - 1) * 4;
1288   for (k = 0; k < m; k++) { /* loop over added rows */
1289     row  = im[k];
1290     rp   = aj + ai[row];
1291     ap   = aa + 16 * ai[row];
1292     nrow = ailen[row];
1293     low  = 0;
1294     high = nrow;
1295     for (l = 0; l < n; l++) { /* loop over added columns */
1296       col = in[l];
1297       if (col <= lastcol) low = 0;
1298       else high = nrow;
1299       lastcol = col;
1300       value   = v + k * (stepval + 4 + l) * 4;
1301       while (high - low > 7) {
1302         t = (low + high) / 2;
1303         if (rp[t] > col) high = t;
1304         else low = t;
1305       }
1306       for (i = low; i < high; i++) {
1307         if (rp[i] > col) break;
1308         if (rp[i] == col) {
1309           bap = ap + 16 * i;
1310           for (ii = 0; ii < 4; ii++, value += stepval) {
1311             for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1312           }
1313           goto noinsert2;
1314         }
1315       }
1316       N = nrow++ - 1;
1317       high++; /* added new column index thus must search to one higher than before */
1318       /* shift up all the later entries in this row */
1319       for (ii = N; ii >= i; ii--) {
1320         rp[ii + 1] = rp[ii];
1321         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1322       }
1323       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1324       rp[i] = col;
1325       bap   = ap + 16 * i;
1326       for (ii = 0; ii < 4; ii++, value += stepval) {
1327         for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1328       }
1329     noinsert2:;
1330       low = i;
1331     }
1332     ailen[row] = nrow;
1333   }
1334   PetscFunctionReturnVoid();
1335 }
1336 
1337 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1338   #define matsetvalues4_ MATSETVALUES4
1339 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1340   #define matsetvalues4_ matsetvalues4
1341 #endif
1342 
1343 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1344 {
1345   Mat          A = *AA;
1346   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1347   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1348   PetscInt    *ai = a->i, *ailen = a->ilen;
1349   PetscInt    *aj = a->j, brow, bcol;
1350   PetscInt     ridx, cidx, lastcol = -1;
1351   MatScalar   *ap, value, *aa      = a->a, *bap;
1352 
1353   PetscFunctionBegin;
1354   for (k = 0; k < m; k++) { /* loop over added rows */
1355     row  = im[k];
1356     brow = row / 4;
1357     rp   = aj + ai[brow];
1358     ap   = aa + 16 * ai[brow];
1359     nrow = ailen[brow];
1360     low  = 0;
1361     high = nrow;
1362     for (l = 0; l < n; l++) { /* loop over added columns */
1363       col   = in[l];
1364       bcol  = col / 4;
1365       ridx  = row % 4;
1366       cidx  = col % 4;
1367       value = v[l + k * n];
1368       if (col <= lastcol) low = 0;
1369       else high = nrow;
1370       lastcol = col;
1371       while (high - low > 7) {
1372         t = (low + high) / 2;
1373         if (rp[t] > bcol) high = t;
1374         else low = t;
1375       }
1376       for (i = low; i < high; i++) {
1377         if (rp[i] > bcol) break;
1378         if (rp[i] == bcol) {
1379           bap = ap + 16 * i + 4 * cidx + ridx;
1380           *bap += value;
1381           goto noinsert1;
1382         }
1383       }
1384       N = nrow++ - 1;
1385       high++; /* added new column thus must search to one higher than before */
1386       /* shift up all the later entries in this row */
1387       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
1388       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
1389       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1390       rp[i]                        = bcol;
1391       ap[16 * i + 4 * cidx + ridx] = value;
1392     noinsert1:;
1393       low = i;
1394     }
1395     ailen[brow] = nrow;
1396   }
1397   PetscFunctionReturnVoid();
1398 }
1399 
1400 /*
1401      Checks for missing diagonals
1402 */
1403 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1404 {
1405   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1406   PetscInt    *diag, *ii = a->i, i;
1407 
1408   PetscFunctionBegin;
1409   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
1410   *missing = PETSC_FALSE;
1411   if (A->rmap->n > 0 && !ii) {
1412     *missing = PETSC_TRUE;
1413     if (d) *d = 0;
1414     PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n"));
1415   } else {
1416     PetscInt n;
1417     n    = PetscMin(a->mbs, a->nbs);
1418     diag = a->diag;
1419     for (i = 0; i < n; i++) {
1420       if (diag[i] >= ii[i + 1]) {
1421         *missing = PETSC_TRUE;
1422         if (d) *d = i;
1423         PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i));
1424         break;
1425       }
1426     }
1427   }
1428   PetscFunctionReturn(PETSC_SUCCESS);
1429 }
1430 
1431 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1432 {
1433   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1434   PetscInt     i, j, m = a->mbs;
1435 
1436   PetscFunctionBegin;
1437   if (!a->diag) {
1438     PetscCall(PetscMalloc1(m, &a->diag));
1439     a->free_diag = PETSC_TRUE;
1440   }
1441   for (i = 0; i < m; i++) {
1442     a->diag[i] = a->i[i + 1];
1443     for (j = a->i[i]; j < a->i[i + 1]; j++) {
1444       if (a->j[j] == i) {
1445         a->diag[i] = j;
1446         break;
1447       }
1448     }
1449   }
1450   PetscFunctionReturn(PETSC_SUCCESS);
1451 }
1452 
1453 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1454 {
1455   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1456   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
1457   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
1458 
1459   PetscFunctionBegin;
1460   *nn = n;
1461   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1462   if (symmetric) {
1463     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1464     nz = tia[n];
1465   } else {
1466     tia = a->i;
1467     tja = a->j;
1468   }
1469 
1470   if (!blockcompressed && bs > 1) {
1471     (*nn) *= bs;
1472     /* malloc & create the natural set of indices */
1473     PetscCall(PetscMalloc1((n + 1) * bs, ia));
1474     if (n) {
1475       (*ia)[0] = oshift;
1476       for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
1477     }
1478 
1479     for (i = 1; i < n; i++) {
1480       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1481       for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
1482     }
1483     if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1484 
1485     if (inja) {
1486       PetscCall(PetscMalloc1(nz * bs * bs, ja));
1487       cnt = 0;
1488       for (i = 0; i < n; i++) {
1489         for (j = 0; j < bs; j++) {
1490           for (k = tia[i]; k < tia[i + 1]; k++) {
1491             for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
1492           }
1493         }
1494       }
1495     }
1496 
1497     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1498       PetscCall(PetscFree(tia));
1499       PetscCall(PetscFree(tja));
1500     }
1501   } else if (oshift == 1) {
1502     if (symmetric) {
1503       nz = tia[A->rmap->n / bs];
1504       /*  add 1 to i and j indices */
1505       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1506       *ia = tia;
1507       if (ja) {
1508         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1509         *ja = tja;
1510       }
1511     } else {
1512       nz = a->i[A->rmap->n / bs];
1513       /* malloc space and  add 1 to i and j indices */
1514       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1515       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1516       if (ja) {
1517         PetscCall(PetscMalloc1(nz, ja));
1518         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1519       }
1520     }
1521   } else {
1522     *ia = tia;
1523     if (ja) *ja = tja;
1524   }
1525   PetscFunctionReturn(PETSC_SUCCESS);
1526 }
1527 
1528 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1529 {
1530   PetscFunctionBegin;
1531   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1532   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1533     PetscCall(PetscFree(*ia));
1534     if (ja) PetscCall(PetscFree(*ja));
1535   }
1536   PetscFunctionReturn(PETSC_SUCCESS);
1537 }
1538 
1539 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1540 {
1541   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1542 
1543   PetscFunctionBegin;
1544 #if defined(PETSC_USE_LOG)
1545   PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
1546 #endif
1547   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
1548   PetscCall(ISDestroy(&a->row));
1549   PetscCall(ISDestroy(&a->col));
1550   if (a->free_diag) PetscCall(PetscFree(a->diag));
1551   PetscCall(PetscFree(a->idiag));
1552   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
1553   PetscCall(PetscFree(a->solve_work));
1554   PetscCall(PetscFree(a->mult_work));
1555   PetscCall(PetscFree(a->sor_workt));
1556   PetscCall(PetscFree(a->sor_work));
1557   PetscCall(ISDestroy(&a->icol));
1558   PetscCall(PetscFree(a->saved_values));
1559   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1560 
1561   PetscCall(MatDestroy(&a->sbaijMat));
1562   PetscCall(MatDestroy(&a->parent));
1563   PetscCall(PetscFree(A->data));
1564 
1565   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
1566   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
1567   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
1568   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
1569   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
1570   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
1571   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
1572   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
1573   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
1574   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
1575   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
1576   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
1577 #if defined(PETSC_HAVE_HYPRE)
1578   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
1579 #endif
1580   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
1581   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1582   PetscFunctionReturn(PETSC_SUCCESS);
1583 }
1584 
1585 PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1586 {
1587   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1588 
1589   PetscFunctionBegin;
1590   switch (op) {
1591   case MAT_ROW_ORIENTED:
1592     a->roworiented = flg;
1593     break;
1594   case MAT_KEEP_NONZERO_PATTERN:
1595     a->keepnonzeropattern = flg;
1596     break;
1597   case MAT_NEW_NONZERO_LOCATIONS:
1598     a->nonew = (flg ? 0 : 1);
1599     break;
1600   case MAT_NEW_NONZERO_LOCATION_ERR:
1601     a->nonew = (flg ? -1 : 0);
1602     break;
1603   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1604     a->nonew = (flg ? -2 : 0);
1605     break;
1606   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1607     a->nounused = (flg ? -1 : 0);
1608     break;
1609   case MAT_FORCE_DIAGONAL_ENTRIES:
1610   case MAT_IGNORE_OFF_PROC_ENTRIES:
1611   case MAT_USE_HASH_TABLE:
1612   case MAT_SORTED_FULL:
1613     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1614     break;
1615   case MAT_SPD:
1616   case MAT_SYMMETRIC:
1617   case MAT_STRUCTURALLY_SYMMETRIC:
1618   case MAT_HERMITIAN:
1619   case MAT_SYMMETRY_ETERNAL:
1620   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1621   case MAT_SUBMAT_SINGLEIS:
1622   case MAT_STRUCTURE_ONLY:
1623   case MAT_SPD_ETERNAL:
1624     /* if the diagonal matrix is square it inherits some of the properties above */
1625     break;
1626   default:
1627     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1628   }
1629   PetscFunctionReturn(PETSC_SUCCESS);
1630 }
1631 
1632 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1633 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1634 {
1635   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
1636   MatScalar   *aa_i;
1637   PetscScalar *v_i;
1638 
1639   PetscFunctionBegin;
1640   bs  = A->rmap->bs;
1641   bs2 = bs * bs;
1642   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1643 
1644   bn  = row / bs; /* Block number */
1645   bp  = row % bs; /* Block Position */
1646   M   = ai[bn + 1] - ai[bn];
1647   *nz = bs * M;
1648 
1649   if (v) {
1650     *v = NULL;
1651     if (*nz) {
1652       PetscCall(PetscMalloc1(*nz, v));
1653       for (i = 0; i < M; i++) { /* for each block in the block row */
1654         v_i  = *v + i * bs;
1655         aa_i = aa + bs2 * (ai[bn] + i);
1656         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
1657       }
1658     }
1659   }
1660 
1661   if (idx) {
1662     *idx = NULL;
1663     if (*nz) {
1664       PetscCall(PetscMalloc1(*nz, idx));
1665       for (i = 0; i < M; i++) { /* for each block in the block row */
1666         idx_i = *idx + i * bs;
1667         itmp  = bs * aj[ai[bn] + i];
1668         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
1669       }
1670     }
1671   }
1672   PetscFunctionReturn(PETSC_SUCCESS);
1673 }
1674 
1675 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1676 {
1677   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1678 
1679   PetscFunctionBegin;
1680   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
1681   PetscFunctionReturn(PETSC_SUCCESS);
1682 }
1683 
1684 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1685 {
1686   PetscFunctionBegin;
1687   if (nz) *nz = 0;
1688   if (idx) PetscCall(PetscFree(*idx));
1689   if (v) PetscCall(PetscFree(*v));
1690   PetscFunctionReturn(PETSC_SUCCESS);
1691 }
1692 
1693 PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1694 {
1695   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
1696   Mat          C;
1697   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
1698   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
1699   MatScalar   *ata, *aa = a->a;
1700 
1701   PetscFunctionBegin;
1702   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1703   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1704   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1705     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1706 
1707     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
1708     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
1709     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
1710     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
1711 
1712     at  = (Mat_SeqBAIJ *)C->data;
1713     ati = at->i;
1714     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1715   } else {
1716     C   = *B;
1717     at  = (Mat_SeqBAIJ *)C->data;
1718     ati = at->i;
1719   }
1720 
1721   atj = at->j;
1722   ata = at->a;
1723 
1724   /* Copy ati into atfill so we have locations of the next free space in atj */
1725   PetscCall(PetscArraycpy(atfill, ati, nbs));
1726 
1727   /* Walk through A row-wise and mark nonzero entries of A^T. */
1728   for (i = 0; i < mbs; i++) {
1729     anzj = ai[i + 1] - ai[i];
1730     for (j = 0; j < anzj; j++) {
1731       atj[atfill[*aj]] = i;
1732       for (kr = 0; kr < bs; kr++) {
1733         for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
1734       }
1735       atfill[*aj++] += 1;
1736     }
1737   }
1738   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1739   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1740 
1741   /* Clean up temporary space and complete requests. */
1742   PetscCall(PetscFree(atfill));
1743 
1744   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1745     PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1746     *B = C;
1747   } else {
1748     PetscCall(MatHeaderMerge(A, &C));
1749   }
1750   PetscFunctionReturn(PETSC_SUCCESS);
1751 }
1752 
1753 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1754 {
1755   Mat Btrans;
1756 
1757   PetscFunctionBegin;
1758   *f = PETSC_FALSE;
1759   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
1760   PetscCall(MatEqual_SeqBAIJ(B, Btrans, f));
1761   PetscCall(MatDestroy(&Btrans));
1762   PetscFunctionReturn(PETSC_SUCCESS);
1763 }
1764 
1765 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1766 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1767 {
1768   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1769   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1770   PetscInt    *rowlens, *colidxs;
1771   PetscScalar *matvals;
1772 
1773   PetscFunctionBegin;
1774   PetscCall(PetscViewerSetUp(viewer));
1775 
1776   M  = mat->rmap->N;
1777   N  = mat->cmap->N;
1778   m  = mat->rmap->n;
1779   bs = mat->rmap->bs;
1780   nz = bs * bs * A->nz;
1781 
1782   /* write matrix header */
1783   header[0] = MAT_FILE_CLASSID;
1784   header[1] = M;
1785   header[2] = N;
1786   header[3] = nz;
1787   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1788 
1789   /* store row lengths */
1790   PetscCall(PetscMalloc1(m, &rowlens));
1791   for (cnt = 0, i = 0; i < A->mbs; i++)
1792     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
1793   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
1794   PetscCall(PetscFree(rowlens));
1795 
1796   /* store column indices  */
1797   PetscCall(PetscMalloc1(nz, &colidxs));
1798   for (cnt = 0, i = 0; i < A->mbs; i++)
1799     for (k = 0; k < bs; k++)
1800       for (j = A->i[i]; j < A->i[i + 1]; j++)
1801         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
1802   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1803   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
1804   PetscCall(PetscFree(colidxs));
1805 
1806   /* store nonzero values */
1807   PetscCall(PetscMalloc1(nz, &matvals));
1808   for (cnt = 0, i = 0; i < A->mbs; i++)
1809     for (k = 0; k < bs; k++)
1810       for (j = A->i[i]; j < A->i[i + 1]; j++)
1811         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
1812   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1813   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
1814   PetscCall(PetscFree(matvals));
1815 
1816   /* write block size option to the viewer's .info file */
1817   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1818   PetscFunctionReturn(PETSC_SUCCESS);
1819 }
1820 
1821 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1822 {
1823   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1824   PetscInt     i, bs = A->rmap->bs, k;
1825 
1826   PetscFunctionBegin;
1827   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1828   for (i = 0; i < a->mbs; i++) {
1829     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
1830     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
1831     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1832   }
1833   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1834   PetscFunctionReturn(PETSC_SUCCESS);
1835 }
1836 
1837 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1838 {
1839   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1840   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1841   PetscViewerFormat format;
1842 
1843   PetscFunctionBegin;
1844   if (A->structure_only) {
1845     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
1846     PetscFunctionReturn(PETSC_SUCCESS);
1847   }
1848 
1849   PetscCall(PetscViewerGetFormat(viewer, &format));
1850   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1851     PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1852   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1853     const char *matname;
1854     Mat         aij;
1855     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
1856     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
1857     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
1858     PetscCall(MatView(aij, viewer));
1859     PetscCall(MatDestroy(&aij));
1860   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1861     PetscFunctionReturn(PETSC_SUCCESS);
1862   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1863     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1864     for (i = 0; i < a->mbs; i++) {
1865       for (j = 0; j < bs; j++) {
1866         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1867         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1868           for (l = 0; l < bs; l++) {
1869 #if defined(PETSC_USE_COMPLEX)
1870             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1871               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1872             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1873               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1874             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1875               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1876             }
1877 #else
1878             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1879 #endif
1880           }
1881         }
1882         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1883       }
1884     }
1885     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1886   } else {
1887     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1888     for (i = 0; i < a->mbs; i++) {
1889       for (j = 0; j < bs; j++) {
1890         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1891         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1892           for (l = 0; l < bs; l++) {
1893 #if defined(PETSC_USE_COMPLEX)
1894             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
1895               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1896             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
1897               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1898             } else {
1899               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1900             }
1901 #else
1902             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1903 #endif
1904           }
1905         }
1906         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1907       }
1908     }
1909     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1910   }
1911   PetscCall(PetscViewerFlush(viewer));
1912   PetscFunctionReturn(PETSC_SUCCESS);
1913 }
1914 
1915 #include <petscdraw.h>
1916 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1917 {
1918   Mat               A = (Mat)Aa;
1919   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1920   PetscInt          row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2;
1921   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1922   MatScalar        *aa;
1923   PetscViewer       viewer;
1924   PetscViewerFormat format;
1925 
1926   PetscFunctionBegin;
1927   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
1928   PetscCall(PetscViewerGetFormat(viewer, &format));
1929   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
1930 
1931   /* loop over matrix elements drawing boxes */
1932 
1933   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1934     PetscDrawCollectiveBegin(draw);
1935     /* Blue for negative, Cyan for zero and  Red for positive */
1936     color = PETSC_DRAW_BLUE;
1937     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1938       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1939         y_l = A->rmap->N - row - 1.0;
1940         y_r = y_l + 1.0;
1941         x_l = a->j[j] * bs;
1942         x_r = x_l + 1.0;
1943         aa  = a->a + j * bs2;
1944         for (k = 0; k < bs; k++) {
1945           for (l = 0; l < bs; l++) {
1946             if (PetscRealPart(*aa++) >= 0.) continue;
1947             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1948           }
1949         }
1950       }
1951     }
1952     color = PETSC_DRAW_CYAN;
1953     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1954       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1955         y_l = A->rmap->N - row - 1.0;
1956         y_r = y_l + 1.0;
1957         x_l = a->j[j] * bs;
1958         x_r = x_l + 1.0;
1959         aa  = a->a + j * bs2;
1960         for (k = 0; k < bs; k++) {
1961           for (l = 0; l < bs; l++) {
1962             if (PetscRealPart(*aa++) != 0.) continue;
1963             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1964           }
1965         }
1966       }
1967     }
1968     color = PETSC_DRAW_RED;
1969     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1970       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1971         y_l = A->rmap->N - row - 1.0;
1972         y_r = y_l + 1.0;
1973         x_l = a->j[j] * bs;
1974         x_r = x_l + 1.0;
1975         aa  = a->a + j * bs2;
1976         for (k = 0; k < bs; k++) {
1977           for (l = 0; l < bs; l++) {
1978             if (PetscRealPart(*aa++) <= 0.) continue;
1979             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1980           }
1981         }
1982       }
1983     }
1984     PetscDrawCollectiveEnd(draw);
1985   } else {
1986     /* use contour shading to indicate magnitude of values */
1987     /* first determine max of all nonzero values */
1988     PetscReal minv = 0.0, maxv = 0.0;
1989     PetscDraw popup;
1990 
1991     for (i = 0; i < a->nz * a->bs2; i++) {
1992       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1993     }
1994     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1995     PetscCall(PetscDrawGetPopup(draw, &popup));
1996     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
1997 
1998     PetscDrawCollectiveBegin(draw);
1999     for (i = 0, row = 0; i < mbs; i++, row += bs) {
2000       for (j = a->i[i]; j < a->i[i + 1]; j++) {
2001         y_l = A->rmap->N - row - 1.0;
2002         y_r = y_l + 1.0;
2003         x_l = a->j[j] * bs;
2004         x_r = x_l + 1.0;
2005         aa  = a->a + j * bs2;
2006         for (k = 0; k < bs; k++) {
2007           for (l = 0; l < bs; l++) {
2008             MatScalar v = *aa++;
2009             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
2010             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2011           }
2012         }
2013       }
2014     }
2015     PetscDrawCollectiveEnd(draw);
2016   }
2017   PetscFunctionReturn(PETSC_SUCCESS);
2018 }
2019 
2020 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2021 {
2022   PetscReal xl, yl, xr, yr, w, h;
2023   PetscDraw draw;
2024   PetscBool isnull;
2025 
2026   PetscFunctionBegin;
2027   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
2028   PetscCall(PetscDrawIsNull(draw, &isnull));
2029   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
2030 
2031   xr = A->cmap->n;
2032   yr = A->rmap->N;
2033   h  = yr / 10.0;
2034   w  = xr / 10.0;
2035   xr += w;
2036   yr += h;
2037   xl = -w;
2038   yl = -h;
2039   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
2040   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
2041   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
2042   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
2043   PetscCall(PetscDrawSave(draw));
2044   PetscFunctionReturn(PETSC_SUCCESS);
2045 }
2046 
2047 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2048 {
2049   PetscBool iascii, isbinary, isdraw;
2050 
2051   PetscFunctionBegin;
2052   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2053   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2054   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
2055   if (iascii) {
2056     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
2057   } else if (isbinary) {
2058     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
2059   } else if (isdraw) {
2060     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
2061   } else {
2062     Mat B;
2063     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2064     PetscCall(MatView(B, viewer));
2065     PetscCall(MatDestroy(&B));
2066   }
2067   PetscFunctionReturn(PETSC_SUCCESS);
2068 }
2069 
2070 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2071 {
2072   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2073   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2074   PetscInt    *ai = a->i, *ailen = a->ilen;
2075   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
2076   MatScalar   *ap, *aa = a->a;
2077 
2078   PetscFunctionBegin;
2079   for (k = 0; k < m; k++) { /* loop over rows */
2080     row  = im[k];
2081     brow = row / bs;
2082     if (row < 0) {
2083       v += n;
2084       continue;
2085     } /* negative row */
2086     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2087     rp   = aj ? aj + ai[brow] : NULL;       /* mustn't add to NULL, that is UB */
2088     ap   = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */
2089     nrow = ailen[brow];
2090     for (l = 0; l < n; l++) { /* loop over columns */
2091       if (in[l] < 0) {
2092         v++;
2093         continue;
2094       } /* negative column */
2095       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
2096       col  = in[l];
2097       bcol = col / bs;
2098       cidx = col % bs;
2099       ridx = row % bs;
2100       high = nrow;
2101       low  = 0; /* assume unsorted */
2102       while (high - low > 5) {
2103         t = (low + high) / 2;
2104         if (rp[t] > bcol) high = t;
2105         else low = t;
2106       }
2107       for (i = low; i < high; i++) {
2108         if (rp[i] > bcol) break;
2109         if (rp[i] == bcol) {
2110           *v++ = ap[bs2 * i + bs * cidx + ridx];
2111           goto finished;
2112         }
2113       }
2114       *v++ = 0.0;
2115     finished:;
2116     }
2117   }
2118   PetscFunctionReturn(PETSC_SUCCESS);
2119 }
2120 
2121 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2122 {
2123   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2124   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2125   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2126   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2127   PetscBool          roworiented = a->roworiented;
2128   const PetscScalar *value       = v;
2129   MatScalar         *ap = NULL, *aa = a->a, *bap;
2130 
2131   PetscFunctionBegin;
2132   if (roworiented) {
2133     stepval = (n - 1) * bs;
2134   } else {
2135     stepval = (m - 1) * bs;
2136   }
2137   for (k = 0; k < m; k++) { /* loop over added rows */
2138     row = im[k];
2139     if (row < 0) continue;
2140     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
2141     rp = aj + ai[row];
2142     if (!A->structure_only) ap = aa + bs2 * ai[row];
2143     rmax = imax[row];
2144     nrow = ailen[row];
2145     low  = 0;
2146     high = nrow;
2147     for (l = 0; l < n; l++) { /* loop over added columns */
2148       if (in[l] < 0) continue;
2149       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
2150       col = in[l];
2151       if (!A->structure_only) {
2152         if (roworiented) {
2153           value = v + (k * (stepval + bs) + l) * bs;
2154         } else {
2155           value = v + (l * (stepval + bs) + k) * bs;
2156         }
2157       }
2158       if (col <= lastcol) low = 0;
2159       else high = nrow;
2160       lastcol = col;
2161       while (high - low > 7) {
2162         t = (low + high) / 2;
2163         if (rp[t] > col) high = t;
2164         else low = t;
2165       }
2166       for (i = low; i < high; i++) {
2167         if (rp[i] > col) break;
2168         if (rp[i] == col) {
2169           if (A->structure_only) goto noinsert2;
2170           bap = ap + bs2 * i;
2171           if (roworiented) {
2172             if (is == ADD_VALUES) {
2173               for (ii = 0; ii < bs; ii++, value += stepval) {
2174                 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2175               }
2176             } else {
2177               for (ii = 0; ii < bs; ii++, value += stepval) {
2178                 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2179               }
2180             }
2181           } else {
2182             if (is == ADD_VALUES) {
2183               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2184                 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
2185                 bap += bs;
2186               }
2187             } else {
2188               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2189                 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
2190                 bap += bs;
2191               }
2192             }
2193           }
2194           goto noinsert2;
2195         }
2196       }
2197       if (nonew == 1) goto noinsert2;
2198       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2199       if (A->structure_only) {
2200         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
2201       } else {
2202         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2203       }
2204       N = nrow++ - 1;
2205       high++;
2206       /* shift up all the later entries in this row */
2207       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2208       rp[i] = col;
2209       if (!A->structure_only) {
2210         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2211         bap = ap + bs2 * i;
2212         if (roworiented) {
2213           for (ii = 0; ii < bs; ii++, value += stepval) {
2214             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2215           }
2216         } else {
2217           for (ii = 0; ii < bs; ii++, value += stepval) {
2218             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2219           }
2220         }
2221       }
2222     noinsert2:;
2223       low = i;
2224     }
2225     ailen[row] = nrow;
2226   }
2227   PetscFunctionReturn(PETSC_SUCCESS);
2228 }
2229 
2230 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2231 {
2232   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2233   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2234   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2235   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2236   MatScalar   *aa    = a->a, *ap;
2237   PetscReal    ratio = 0.6;
2238 
2239   PetscFunctionBegin;
2240   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
2241 
2242   if (m) rmax = ailen[0];
2243   for (i = 1; i < mbs; i++) {
2244     /* move each row back by the amount of empty slots (fshift) before it*/
2245     fshift += imax[i - 1] - ailen[i - 1];
2246     rmax = PetscMax(rmax, ailen[i]);
2247     if (fshift) {
2248       ip = aj + ai[i];
2249       ap = aa + bs2 * ai[i];
2250       N  = ailen[i];
2251       PetscCall(PetscArraymove(ip - fshift, ip, N));
2252       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2253     }
2254     ai[i] = ai[i - 1] + ailen[i - 1];
2255   }
2256   if (mbs) {
2257     fshift += imax[mbs - 1] - ailen[mbs - 1];
2258     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2259   }
2260 
2261   /* reset ilen and imax for each row */
2262   a->nonzerorowcnt = 0;
2263   if (A->structure_only) {
2264     PetscCall(PetscFree2(a->imax, a->ilen));
2265   } else { /* !A->structure_only */
2266     for (i = 0; i < mbs; i++) {
2267       ailen[i] = imax[i] = ai[i + 1] - ai[i];
2268       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2269     }
2270   }
2271   a->nz = ai[mbs];
2272 
2273   /* diagonals may have moved, so kill the diagonal pointers */
2274   a->idiagvalid = PETSC_FALSE;
2275   if (fshift && a->diag) PetscCall(PetscFree(a->diag));
2276   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
2277   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
2278   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
2279   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
2280 
2281   A->info.mallocs += a->reallocs;
2282   a->reallocs         = 0;
2283   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2284   a->rmax             = rmax;
2285 
2286   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
2287   PetscFunctionReturn(PETSC_SUCCESS);
2288 }
2289 
2290 /*
2291    This function returns an array of flags which indicate the locations of contiguous
2292    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2293    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2294    Assume: sizes should be long enough to hold all the values.
2295 */
2296 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2297 {
2298   PetscInt j = 0;
2299 
2300   PetscFunctionBegin;
2301   for (PetscInt i = 0; i < n; j++) {
2302     PetscInt row = idx[i];
2303     if (row % bs != 0) { /* Not the beginning of a block */
2304       sizes[j] = 1;
2305       i++;
2306     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2307       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2308       i++;
2309     } else { /* Beginning of the block, so check if the complete block exists */
2310       PetscBool flg = PETSC_TRUE;
2311       for (PetscInt k = 1; k < bs; k++) {
2312         if (row + k != idx[i + k]) { /* break in the block */
2313           flg = PETSC_FALSE;
2314           break;
2315         }
2316       }
2317       if (flg) { /* No break in the bs */
2318         sizes[j] = bs;
2319         i += bs;
2320       } else {
2321         sizes[j] = 1;
2322         i++;
2323       }
2324     }
2325   }
2326   *bs_max = j;
2327   PetscFunctionReturn(PETSC_SUCCESS);
2328 }
2329 
2330 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2331 {
2332   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2333   PetscInt           i, j, k, count, *rows;
2334   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
2335   PetscScalar        zero = 0.0;
2336   MatScalar         *aa;
2337   const PetscScalar *xx;
2338   PetscScalar       *bb;
2339 
2340   PetscFunctionBegin;
2341   /* fix right hand side if needed */
2342   if (x && b) {
2343     PetscCall(VecGetArrayRead(x, &xx));
2344     PetscCall(VecGetArray(b, &bb));
2345     for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
2346     PetscCall(VecRestoreArrayRead(x, &xx));
2347     PetscCall(VecRestoreArray(b, &bb));
2348   }
2349 
2350   /* Make a copy of the IS and  sort it */
2351   /* allocate memory for rows,sizes */
2352   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2353 
2354   /* copy IS values to rows, and sort them */
2355   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
2356   PetscCall(PetscSortInt(is_n, rows));
2357 
2358   if (baij->keepnonzeropattern) {
2359     for (i = 0; i < is_n; i++) sizes[i] = 1;
2360     bs_max = is_n;
2361   } else {
2362     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2363     A->nonzerostate++;
2364   }
2365 
2366   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2367     row = rows[j];
2368     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2369     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2370     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
2371     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2372       if (diag != (PetscScalar)0.0) {
2373         if (baij->ilen[row / bs] > 0) {
2374           baij->ilen[row / bs]       = 1;
2375           baij->j[baij->i[row / bs]] = row / bs;
2376 
2377           PetscCall(PetscArrayzero(aa, count * bs));
2378         }
2379         /* Now insert all the diagonal values for this bs */
2380         for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES));
2381       } else { /* (diag == 0.0) */
2382         baij->ilen[row / bs] = 0;
2383       }      /* end (diag == 0.0) */
2384     } else { /* (sizes[i] != bs) */
2385       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2386       for (k = 0; k < count; k++) {
2387         aa[0] = zero;
2388         aa += bs;
2389       }
2390       if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES));
2391     }
2392   }
2393 
2394   PetscCall(PetscFree2(rows, sizes));
2395   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2396   PetscFunctionReturn(PETSC_SUCCESS);
2397 }
2398 
2399 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2400 {
2401   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2402   PetscInt           i, j, k, count;
2403   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
2404   PetscScalar        zero = 0.0;
2405   MatScalar         *aa;
2406   const PetscScalar *xx;
2407   PetscScalar       *bb;
2408   PetscBool         *zeroed, vecs = PETSC_FALSE;
2409 
2410   PetscFunctionBegin;
2411   /* fix right hand side if needed */
2412   if (x && b) {
2413     PetscCall(VecGetArrayRead(x, &xx));
2414     PetscCall(VecGetArray(b, &bb));
2415     vecs = PETSC_TRUE;
2416   }
2417 
2418   /* zero the columns */
2419   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
2420   for (i = 0; i < is_n; i++) {
2421     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
2422     zeroed[is_idx[i]] = PETSC_TRUE;
2423   }
2424   for (i = 0; i < A->rmap->N; i++) {
2425     if (!zeroed[i]) {
2426       row = i / bs;
2427       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
2428         for (k = 0; k < bs; k++) {
2429           col = bs * baij->j[j] + k;
2430           if (zeroed[col]) {
2431             aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
2432             if (vecs) bb[i] -= aa[0] * xx[col];
2433             aa[0] = 0.0;
2434           }
2435         }
2436       }
2437     } else if (vecs) bb[i] = diag * xx[i];
2438   }
2439   PetscCall(PetscFree(zeroed));
2440   if (vecs) {
2441     PetscCall(VecRestoreArrayRead(x, &xx));
2442     PetscCall(VecRestoreArray(b, &bb));
2443   }
2444 
2445   /* zero the rows */
2446   for (i = 0; i < is_n; i++) {
2447     row   = is_idx[i];
2448     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2449     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
2450     for (k = 0; k < count; k++) {
2451       aa[0] = zero;
2452       aa += bs;
2453     }
2454     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
2455   }
2456   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2457   PetscFunctionReturn(PETSC_SUCCESS);
2458 }
2459 
2460 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2461 {
2462   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2463   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2464   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2465   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2466   PetscInt     ridx, cidx, bs2                 = a->bs2;
2467   PetscBool    roworiented = a->roworiented;
2468   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;
2469 
2470   PetscFunctionBegin;
2471   for (k = 0; k < m; k++) { /* loop over added rows */
2472     row  = im[k];
2473     brow = row / bs;
2474     if (row < 0) continue;
2475     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
2476     rp = aj + ai[brow];
2477     if (!A->structure_only) ap = aa + bs2 * ai[brow];
2478     rmax = imax[brow];
2479     nrow = ailen[brow];
2480     low  = 0;
2481     high = nrow;
2482     for (l = 0; l < n; l++) { /* loop over added columns */
2483       if (in[l] < 0) continue;
2484       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
2485       col  = in[l];
2486       bcol = col / bs;
2487       ridx = row % bs;
2488       cidx = col % bs;
2489       if (!A->structure_only) {
2490         if (roworiented) {
2491           value = v[l + k * n];
2492         } else {
2493           value = v[k + l * m];
2494         }
2495       }
2496       if (col <= lastcol) low = 0;
2497       else high = nrow;
2498       lastcol = col;
2499       while (high - low > 7) {
2500         t = (low + high) / 2;
2501         if (rp[t] > bcol) high = t;
2502         else low = t;
2503       }
2504       for (i = low; i < high; i++) {
2505         if (rp[i] > bcol) break;
2506         if (rp[i] == bcol) {
2507           bap = ap + bs2 * i + bs * cidx + ridx;
2508           if (!A->structure_only) {
2509             if (is == ADD_VALUES) *bap += value;
2510             else *bap = value;
2511           }
2512           goto noinsert1;
2513         }
2514       }
2515       if (nonew == 1) goto noinsert1;
2516       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2517       if (A->structure_only) {
2518         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2519       } else {
2520         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2521       }
2522       N = nrow++ - 1;
2523       high++;
2524       /* shift up all the later entries in this row */
2525       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2526       rp[i] = bcol;
2527       if (!A->structure_only) {
2528         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2529         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2530         ap[bs2 * i + bs * cidx + ridx] = value;
2531       }
2532       a->nz++;
2533       A->nonzerostate++;
2534     noinsert1:;
2535       low = i;
2536     }
2537     ailen[brow] = nrow;
2538   }
2539   PetscFunctionReturn(PETSC_SUCCESS);
2540 }
2541 
2542 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2543 {
2544   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
2545   Mat          outA;
2546   PetscBool    row_identity, col_identity;
2547 
2548   PetscFunctionBegin;
2549   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
2550   PetscCall(ISIdentity(row, &row_identity));
2551   PetscCall(ISIdentity(col, &col_identity));
2552   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
2553 
2554   outA            = inA;
2555   inA->factortype = MAT_FACTOR_LU;
2556   PetscCall(PetscFree(inA->solvertype));
2557   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
2558 
2559   PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2560 
2561   PetscCall(PetscObjectReference((PetscObject)row));
2562   PetscCall(ISDestroy(&a->row));
2563   a->row = row;
2564   PetscCall(PetscObjectReference((PetscObject)col));
2565   PetscCall(ISDestroy(&a->col));
2566   a->col = col;
2567 
2568   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2569   PetscCall(ISDestroy(&a->icol));
2570   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2571 
2572   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2573   if (!a->solve_work) { PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); }
2574   PetscCall(MatLUFactorNumeric(outA, inA, info));
2575   PetscFunctionReturn(PETSC_SUCCESS);
2576 }
2577 
2578 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2579 {
2580   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2581 
2582   PetscFunctionBegin;
2583   baij->nz = baij->maxnz;
2584   PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2585   PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
2586   PetscFunctionReturn(PETSC_SUCCESS);
2587 }
2588 
2589 /*@
2590     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix.
2591 
2592   Input Parameters:
2593 +  mat - the `MATSEQBAIJ` matrix
2594 -  indices - the column indices
2595 
2596   Level: advanced
2597 
2598   Notes:
2599     This can be called if you have precomputed the nonzero structure of the
2600   matrix and want to provide it to the matrix object to improve the performance
2601   of the `MatSetValues()` operation.
2602 
2603     You MUST have set the correct numbers of nonzeros per row in the call to
2604   `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
2605 
2606     MUST be called before any calls to `MatSetValues()`
2607 
2608 .seealso: `MATSEQBAIJ`, `MatSetValues()`
2609 @*/
2610 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2611 {
2612   PetscFunctionBegin;
2613   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2614   PetscValidIntPointer(indices, 2);
2615   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices));
2616   PetscFunctionReturn(PETSC_SUCCESS);
2617 }
2618 
2619 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2620 {
2621   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2622   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2623   PetscReal    atmp;
2624   PetscScalar *x, zero = 0.0;
2625   MatScalar   *aa;
2626   PetscInt     ncols, brow, krow, kcol;
2627 
2628   PetscFunctionBegin;
2629   /* why is this not a macro???????????????????????????????????????????????????????????????? */
2630   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2631   bs  = A->rmap->bs;
2632   aa  = a->a;
2633   ai  = a->i;
2634   aj  = a->j;
2635   mbs = a->mbs;
2636 
2637   PetscCall(VecSet(v, zero));
2638   PetscCall(VecGetArray(v, &x));
2639   PetscCall(VecGetLocalSize(v, &n));
2640   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2641   for (i = 0; i < mbs; i++) {
2642     ncols = ai[1] - ai[0];
2643     ai++;
2644     brow = bs * i;
2645     for (j = 0; j < ncols; j++) {
2646       for (kcol = 0; kcol < bs; kcol++) {
2647         for (krow = 0; krow < bs; krow++) {
2648           atmp = PetscAbsScalar(*aa);
2649           aa++;
2650           row = brow + krow; /* row index */
2651           if (PetscAbsScalar(x[row]) < atmp) {
2652             x[row] = atmp;
2653             if (idx) idx[row] = bs * (*aj) + kcol;
2654           }
2655         }
2656       }
2657       aj++;
2658     }
2659   }
2660   PetscCall(VecRestoreArray(v, &x));
2661   PetscFunctionReturn(PETSC_SUCCESS);
2662 }
2663 
2664 PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2665 {
2666   PetscFunctionBegin;
2667   /* If the two matrices have the same copy implementation, use fast copy. */
2668   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2669     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
2670     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2671     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
2672 
2673     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
2674     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
2675     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
2676     PetscCall(PetscObjectStateIncrease((PetscObject)B));
2677   } else {
2678     PetscCall(MatCopy_Basic(A, B, str));
2679   }
2680   PetscFunctionReturn(PETSC_SUCCESS);
2681 }
2682 
2683 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2684 {
2685   PetscFunctionBegin;
2686   PetscCall(MatSeqBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL));
2687   PetscFunctionReturn(PETSC_SUCCESS);
2688 }
2689 
2690 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2691 {
2692   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2693 
2694   PetscFunctionBegin;
2695   *array = a->a;
2696   PetscFunctionReturn(PETSC_SUCCESS);
2697 }
2698 
2699 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2700 {
2701   PetscFunctionBegin;
2702   *array = NULL;
2703   PetscFunctionReturn(PETSC_SUCCESS);
2704 }
2705 
2706 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2707 {
2708   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
2709   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
2710   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
2711 
2712   PetscFunctionBegin;
2713   /* Set the number of nonzeros in the new matrix */
2714   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
2715   PetscFunctionReturn(PETSC_SUCCESS);
2716 }
2717 
2718 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2719 {
2720   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
2721   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2722   PetscBLASInt one = 1;
2723 
2724   PetscFunctionBegin;
2725   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2726     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2727     if (e) {
2728       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2729       if (e) {
2730         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2731         if (e) str = SAME_NONZERO_PATTERN;
2732       }
2733     }
2734     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2735   }
2736   if (str == SAME_NONZERO_PATTERN) {
2737     PetscScalar  alpha = a;
2738     PetscBLASInt bnz;
2739     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2740     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
2741     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2742   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2743     PetscCall(MatAXPY_Basic(Y, a, X, str));
2744   } else {
2745     Mat       B;
2746     PetscInt *nnz;
2747     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
2748     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
2749     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2750     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2751     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
2752     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
2753     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
2754     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
2755     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
2756     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2757     PetscCall(MatHeaderMerge(Y, &B));
2758     PetscCall(PetscFree(nnz));
2759   }
2760   PetscFunctionReturn(PETSC_SUCCESS);
2761 }
2762 
2763 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2764 {
2765 #if PetscDefined(USE_COMPLEX)
2766   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2767   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2768   MatScalar   *aa = a->a;
2769 
2770   PetscFunctionBegin;
2771   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
2772   PetscFunctionReturn(PETSC_SUCCESS);
2773 #else
2774   (void)A;
2775   return PETSC_SUCCESS;
2776 #endif
2777 }
2778 
2779 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2780 {
2781 #if PetscDefined(USE_COMPLEX)
2782   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2783   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2784   MatScalar   *aa = a->a;
2785 
2786   PetscFunctionBegin;
2787   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
2788   PetscFunctionReturn(PETSC_SUCCESS);
2789 #else
2790   (void)A;
2791   return PETSC_SUCCESS;
2792 #endif
2793 }
2794 
2795 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2796 {
2797 #if PetscDefined(USE_COMPLEX)
2798   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2799   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2800   MatScalar   *aa = a->a;
2801 
2802   PetscFunctionBegin;
2803   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2804   PetscFunctionReturn(PETSC_SUCCESS);
2805 #else
2806   (void)A;
2807   return PETSC_SUCCESS;
2808 #endif
2809 }
2810 
2811 /*
2812     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2813 */
2814 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2815 {
2816   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
2817   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
2818   PetscInt     nz = a->i[m], row, *jj, mr, col;
2819 
2820   PetscFunctionBegin;
2821   *nn = n;
2822   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2823   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
2824   PetscCall(PetscCalloc1(n, &collengths));
2825   PetscCall(PetscMalloc1(n + 1, &cia));
2826   PetscCall(PetscMalloc1(nz, &cja));
2827   jj = a->j;
2828   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2829   cia[0] = oshift;
2830   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2831   PetscCall(PetscArrayzero(collengths, n));
2832   jj = a->j;
2833   for (row = 0; row < m; row++) {
2834     mr = a->i[row + 1] - a->i[row];
2835     for (i = 0; i < mr; i++) {
2836       col = *jj++;
2837 
2838       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2839     }
2840   }
2841   PetscCall(PetscFree(collengths));
2842   *ia = cia;
2843   *ja = cja;
2844   PetscFunctionReturn(PETSC_SUCCESS);
2845 }
2846 
2847 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2848 {
2849   PetscFunctionBegin;
2850   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2851   PetscCall(PetscFree(*ia));
2852   PetscCall(PetscFree(*ja));
2853   PetscFunctionReturn(PETSC_SUCCESS);
2854 }
2855 
2856 /*
2857  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2858  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2859  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2860  */
2861 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2862 {
2863   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2864   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2865   PetscInt     nz = a->i[m], row, *jj, mr, col;
2866   PetscInt    *cspidx;
2867 
2868   PetscFunctionBegin;
2869   *nn = n;
2870   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2871 
2872   PetscCall(PetscCalloc1(n, &collengths));
2873   PetscCall(PetscMalloc1(n + 1, &cia));
2874   PetscCall(PetscMalloc1(nz, &cja));
2875   PetscCall(PetscMalloc1(nz, &cspidx));
2876   jj = a->j;
2877   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2878   cia[0] = oshift;
2879   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2880   PetscCall(PetscArrayzero(collengths, n));
2881   jj = a->j;
2882   for (row = 0; row < m; row++) {
2883     mr = a->i[row + 1] - a->i[row];
2884     for (i = 0; i < mr; i++) {
2885       col                                         = *jj++;
2886       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2887       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2888     }
2889   }
2890   PetscCall(PetscFree(collengths));
2891   *ia    = cia;
2892   *ja    = cja;
2893   *spidx = cspidx;
2894   PetscFunctionReturn(PETSC_SUCCESS);
2895 }
2896 
2897 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2898 {
2899   PetscFunctionBegin;
2900   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
2901   PetscCall(PetscFree(*spidx));
2902   PetscFunctionReturn(PETSC_SUCCESS);
2903 }
2904 
2905 PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2906 {
2907   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
2908 
2909   PetscFunctionBegin;
2910   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
2911   PetscCall(MatShift_Basic(Y, a));
2912   PetscFunctionReturn(PETSC_SUCCESS);
2913 }
2914 
2915 /* -------------------------------------------------------------------*/
2916 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2917                                        MatGetRow_SeqBAIJ,
2918                                        MatRestoreRow_SeqBAIJ,
2919                                        MatMult_SeqBAIJ_N,
2920                                        /* 4*/ MatMultAdd_SeqBAIJ_N,
2921                                        MatMultTranspose_SeqBAIJ,
2922                                        MatMultTransposeAdd_SeqBAIJ,
2923                                        NULL,
2924                                        NULL,
2925                                        NULL,
2926                                        /* 10*/ NULL,
2927                                        MatLUFactor_SeqBAIJ,
2928                                        NULL,
2929                                        NULL,
2930                                        MatTranspose_SeqBAIJ,
2931                                        /* 15*/ MatGetInfo_SeqBAIJ,
2932                                        MatEqual_SeqBAIJ,
2933                                        MatGetDiagonal_SeqBAIJ,
2934                                        MatDiagonalScale_SeqBAIJ,
2935                                        MatNorm_SeqBAIJ,
2936                                        /* 20*/ NULL,
2937                                        MatAssemblyEnd_SeqBAIJ,
2938                                        MatSetOption_SeqBAIJ,
2939                                        MatZeroEntries_SeqBAIJ,
2940                                        /* 24*/ MatZeroRows_SeqBAIJ,
2941                                        NULL,
2942                                        NULL,
2943                                        NULL,
2944                                        NULL,
2945                                        /* 29*/ MatSetUp_SeqBAIJ,
2946                                        NULL,
2947                                        NULL,
2948                                        NULL,
2949                                        NULL,
2950                                        /* 34*/ MatDuplicate_SeqBAIJ,
2951                                        NULL,
2952                                        NULL,
2953                                        MatILUFactor_SeqBAIJ,
2954                                        NULL,
2955                                        /* 39*/ MatAXPY_SeqBAIJ,
2956                                        MatCreateSubMatrices_SeqBAIJ,
2957                                        MatIncreaseOverlap_SeqBAIJ,
2958                                        MatGetValues_SeqBAIJ,
2959                                        MatCopy_SeqBAIJ,
2960                                        /* 44*/ NULL,
2961                                        MatScale_SeqBAIJ,
2962                                        MatShift_SeqBAIJ,
2963                                        NULL,
2964                                        MatZeroRowsColumns_SeqBAIJ,
2965                                        /* 49*/ NULL,
2966                                        MatGetRowIJ_SeqBAIJ,
2967                                        MatRestoreRowIJ_SeqBAIJ,
2968                                        MatGetColumnIJ_SeqBAIJ,
2969                                        MatRestoreColumnIJ_SeqBAIJ,
2970                                        /* 54*/ MatFDColoringCreate_SeqXAIJ,
2971                                        NULL,
2972                                        NULL,
2973                                        NULL,
2974                                        MatSetValuesBlocked_SeqBAIJ,
2975                                        /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2976                                        MatDestroy_SeqBAIJ,
2977                                        MatView_SeqBAIJ,
2978                                        NULL,
2979                                        NULL,
2980                                        /* 64*/ NULL,
2981                                        NULL,
2982                                        NULL,
2983                                        NULL,
2984                                        NULL,
2985                                        /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2986                                        NULL,
2987                                        MatConvert_Basic,
2988                                        NULL,
2989                                        NULL,
2990                                        /* 74*/ NULL,
2991                                        MatFDColoringApply_BAIJ,
2992                                        NULL,
2993                                        NULL,
2994                                        NULL,
2995                                        /* 79*/ NULL,
2996                                        NULL,
2997                                        NULL,
2998                                        NULL,
2999                                        MatLoad_SeqBAIJ,
3000                                        /* 84*/ NULL,
3001                                        NULL,
3002                                        NULL,
3003                                        NULL,
3004                                        NULL,
3005                                        /* 89*/ NULL,
3006                                        NULL,
3007                                        NULL,
3008                                        NULL,
3009                                        NULL,
3010                                        /* 94*/ NULL,
3011                                        NULL,
3012                                        NULL,
3013                                        NULL,
3014                                        NULL,
3015                                        /* 99*/ NULL,
3016                                        NULL,
3017                                        NULL,
3018                                        MatConjugate_SeqBAIJ,
3019                                        NULL,
3020                                        /*104*/ NULL,
3021                                        MatRealPart_SeqBAIJ,
3022                                        MatImaginaryPart_SeqBAIJ,
3023                                        NULL,
3024                                        NULL,
3025                                        /*109*/ NULL,
3026                                        NULL,
3027                                        NULL,
3028                                        NULL,
3029                                        MatMissingDiagonal_SeqBAIJ,
3030                                        /*114*/ NULL,
3031                                        NULL,
3032                                        NULL,
3033                                        NULL,
3034                                        NULL,
3035                                        /*119*/ NULL,
3036                                        NULL,
3037                                        MatMultHermitianTranspose_SeqBAIJ,
3038                                        MatMultHermitianTransposeAdd_SeqBAIJ,
3039                                        NULL,
3040                                        /*124*/ NULL,
3041                                        MatGetColumnReductions_SeqBAIJ,
3042                                        MatInvertBlockDiagonal_SeqBAIJ,
3043                                        NULL,
3044                                        NULL,
3045                                        /*129*/ NULL,
3046                                        NULL,
3047                                        NULL,
3048                                        NULL,
3049                                        NULL,
3050                                        /*134*/ NULL,
3051                                        NULL,
3052                                        NULL,
3053                                        NULL,
3054                                        NULL,
3055                                        /*139*/ MatSetBlockSizes_Default,
3056                                        NULL,
3057                                        NULL,
3058                                        MatFDColoringSetUp_SeqXAIJ,
3059                                        NULL,
3060                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3061                                        MatDestroySubMatrices_SeqBAIJ,
3062                                        NULL,
3063                                        NULL,
3064                                        NULL,
3065                                        NULL,
3066                                        /*150*/ NULL,
3067                                        NULL};
3068 
3069 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3070 {
3071   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3072   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3073 
3074   PetscFunctionBegin;
3075   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3076 
3077   /* allocate space for values if not already there */
3078   if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
3079 
3080   /* copy values over */
3081   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
3082   PetscFunctionReturn(PETSC_SUCCESS);
3083 }
3084 
3085 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3086 {
3087   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3088   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3089 
3090   PetscFunctionBegin;
3091   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3092   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
3093 
3094   /* copy values over */
3095   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
3096   PetscFunctionReturn(PETSC_SUCCESS);
3097 }
3098 
3099 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3100 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3101 
3102 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, PetscInt *nnz)
3103 {
3104   Mat_SeqBAIJ *b;
3105   PetscInt     i, mbs, nbs, bs2;
3106   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3107 
3108   PetscFunctionBegin;
3109   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3110   if (nz == MAT_SKIP_ALLOCATION) {
3111     skipallocation = PETSC_TRUE;
3112     nz             = 0;
3113   }
3114 
3115   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
3116   PetscCall(PetscLayoutSetUp(B->rmap));
3117   PetscCall(PetscLayoutSetUp(B->cmap));
3118   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3119 
3120   B->preallocated = PETSC_TRUE;
3121 
3122   mbs = B->rmap->n / bs;
3123   nbs = B->cmap->n / bs;
3124   bs2 = bs * bs;
3125 
3126   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3127 
3128   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3129   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3130   if (nnz) {
3131     for (i = 0; i < mbs; i++) {
3132       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
3133       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3134     }
3135   }
3136 
3137   b = (Mat_SeqBAIJ *)B->data;
3138   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
3139   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3140   PetscOptionsEnd();
3141 
3142   if (!flg) {
3143     switch (bs) {
3144     case 1:
3145       B->ops->mult    = MatMult_SeqBAIJ_1;
3146       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3147       break;
3148     case 2:
3149       B->ops->mult    = MatMult_SeqBAIJ_2;
3150       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3151       break;
3152     case 3:
3153       B->ops->mult    = MatMult_SeqBAIJ_3;
3154       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3155       break;
3156     case 4:
3157       B->ops->mult    = MatMult_SeqBAIJ_4;
3158       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3159       break;
3160     case 5:
3161       B->ops->mult    = MatMult_SeqBAIJ_5;
3162       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3163       break;
3164     case 6:
3165       B->ops->mult    = MatMult_SeqBAIJ_6;
3166       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3167       break;
3168     case 7:
3169       B->ops->mult    = MatMult_SeqBAIJ_7;
3170       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3171       break;
3172     case 9: {
3173       PetscInt version = 1;
3174       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3175       switch (version) {
3176 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3177       case 1:
3178         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
3179         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
3180         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3181         break;
3182 #endif
3183       default:
3184         B->ops->mult    = MatMult_SeqBAIJ_N;
3185         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3186         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3187         break;
3188       }
3189       break;
3190     }
3191     case 11:
3192       B->ops->mult    = MatMult_SeqBAIJ_11;
3193       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3194       break;
3195     case 12: {
3196       PetscInt version = 1;
3197       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3198       switch (version) {
3199       case 1:
3200         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
3201         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3202         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3203         break;
3204       case 2:
3205         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
3206         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
3207         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3208         break;
3209 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3210       case 3:
3211         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
3212         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3213         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3214         break;
3215 #endif
3216       default:
3217         B->ops->mult    = MatMult_SeqBAIJ_N;
3218         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3219         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3220         break;
3221       }
3222       break;
3223     }
3224     case 15: {
3225       PetscInt version = 1;
3226       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3227       switch (version) {
3228       case 1:
3229         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3230         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3231         break;
3232       case 2:
3233         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
3234         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3235         break;
3236       case 3:
3237         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3238         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3239         break;
3240       case 4:
3241         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3242         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3243         break;
3244       default:
3245         B->ops->mult = MatMult_SeqBAIJ_N;
3246         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3247         break;
3248       }
3249       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3250       break;
3251     }
3252     default:
3253       B->ops->mult    = MatMult_SeqBAIJ_N;
3254       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3255       PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3256       break;
3257     }
3258   }
3259   B->ops->sor = MatSOR_SeqBAIJ;
3260   b->mbs      = mbs;
3261   b->nbs      = nbs;
3262   if (!skipallocation) {
3263     if (!b->imax) {
3264       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
3265 
3266       b->free_imax_ilen = PETSC_TRUE;
3267     }
3268     /* b->ilen will count nonzeros in each block row so far. */
3269     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3270     if (!nnz) {
3271       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3272       else if (nz < 0) nz = 1;
3273       nz = PetscMin(nz, nbs);
3274       for (i = 0; i < mbs; i++) b->imax[i] = nz;
3275       PetscCall(PetscIntMultError(nz, mbs, &nz));
3276     } else {
3277       PetscInt64 nz64 = 0;
3278       for (i = 0; i < mbs; i++) {
3279         b->imax[i] = nnz[i];
3280         nz64 += nnz[i];
3281       }
3282       PetscCall(PetscIntCast(nz64, &nz));
3283     }
3284 
3285     /* allocate the matrix space */
3286     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3287     if (B->structure_only) {
3288       PetscCall(PetscMalloc1(nz, &b->j));
3289       PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i));
3290     } else {
3291       PetscInt nzbs2 = 0;
3292       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
3293       PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i));
3294       PetscCall(PetscArrayzero(b->a, nz * bs2));
3295     }
3296     PetscCall(PetscArrayzero(b->j, nz));
3297 
3298     if (B->structure_only) {
3299       b->singlemalloc = PETSC_FALSE;
3300       b->free_a       = PETSC_FALSE;
3301     } else {
3302       b->singlemalloc = PETSC_TRUE;
3303       b->free_a       = PETSC_TRUE;
3304     }
3305     b->free_ij = PETSC_TRUE;
3306 
3307     b->i[0] = 0;
3308     for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3309 
3310   } else {
3311     b->free_a  = PETSC_FALSE;
3312     b->free_ij = PETSC_FALSE;
3313   }
3314 
3315   b->bs2              = bs2;
3316   b->mbs              = mbs;
3317   b->nz               = 0;
3318   b->maxnz            = nz;
3319   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3320   B->was_assembled    = PETSC_FALSE;
3321   B->assembled        = PETSC_FALSE;
3322   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3323   PetscFunctionReturn(PETSC_SUCCESS);
3324 }
3325 
3326 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3327 {
3328   PetscInt     i, m, nz, nz_max = 0, *nnz;
3329   PetscScalar *values      = NULL;
3330   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3331 
3332   PetscFunctionBegin;
3333   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
3334   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
3335   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
3336   PetscCall(PetscLayoutSetUp(B->rmap));
3337   PetscCall(PetscLayoutSetUp(B->cmap));
3338   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3339   m = B->rmap->n / bs;
3340 
3341   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
3342   PetscCall(PetscMalloc1(m + 1, &nnz));
3343   for (i = 0; i < m; i++) {
3344     nz = ii[i + 1] - ii[i];
3345     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3346     nz_max = PetscMax(nz_max, nz);
3347     nnz[i] = nz;
3348   }
3349   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
3350   PetscCall(PetscFree(nnz));
3351 
3352   values = (PetscScalar *)V;
3353   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3354   for (i = 0; i < m; i++) {
3355     PetscInt        ncols = ii[i + 1] - ii[i];
3356     const PetscInt *icols = jj + ii[i];
3357     if (bs == 1 || !roworiented) {
3358       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
3359       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
3360     } else {
3361       PetscInt j;
3362       for (j = 0; j < ncols; j++) {
3363         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
3364         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
3365       }
3366     }
3367   }
3368   if (!V) PetscCall(PetscFree(values));
3369   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3370   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3371   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3372   PetscFunctionReturn(PETSC_SUCCESS);
3373 }
3374 
3375 /*@C
3376    MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3377 
3378    Not Collective
3379 
3380    Input Parameter:
3381 .  mat - a `MATSEQBAIJ` matrix
3382 
3383    Output Parameter:
3384 .   array - pointer to the data
3385 
3386    Level: intermediate
3387 
3388 .seealso: `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3389 @*/
3390 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array)
3391 {
3392   PetscFunctionBegin;
3393   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
3394   PetscFunctionReturn(PETSC_SUCCESS);
3395 }
3396 
3397 /*@C
3398    MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3399 
3400    Not Collective
3401 
3402    Input Parameters:
3403 +  mat - a `MATSEQBAIJ` matrix
3404 -  array - pointer to the data
3405 
3406    Level: intermediate
3407 
3408 .seealso: `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3409 @*/
3410 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array)
3411 {
3412   PetscFunctionBegin;
3413   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
3414   PetscFunctionReturn(PETSC_SUCCESS);
3415 }
3416 
3417 /*MC
3418    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3419    block sparse compressed row format.
3420 
3421    Options Database Keys:
3422 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3423 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3424 
3425    Level: beginner
3426 
3427    Notes:
3428     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
3429     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
3430 
3431    Run with -info to see what version of the matrix-vector product is being used
3432 
3433 .seealso: `MatCreateSeqBAIJ()`
3434 M*/
3435 
3436 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3437 
3438 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3439 {
3440   PetscMPIInt  size;
3441   Mat_SeqBAIJ *b;
3442 
3443   PetscFunctionBegin;
3444   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
3445   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3446 
3447   PetscCall(PetscNew(&b));
3448   B->data = (void *)b;
3449   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
3450 
3451   b->row          = NULL;
3452   b->col          = NULL;
3453   b->icol         = NULL;
3454   b->reallocs     = 0;
3455   b->saved_values = NULL;
3456 
3457   b->roworiented        = PETSC_TRUE;
3458   b->nonew              = 0;
3459   b->diag               = NULL;
3460   B->spptr              = NULL;
3461   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3462   b->keepnonzeropattern = PETSC_FALSE;
3463 
3464   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
3465   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
3466   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
3467   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
3468   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
3469   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
3470   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
3471   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
3472   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
3473   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
3474 #if defined(PETSC_HAVE_HYPRE)
3475   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
3476 #endif
3477   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
3478   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
3479   PetscFunctionReturn(PETSC_SUCCESS);
3480 }
3481 
3482 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3483 {
3484   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3485   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3486 
3487   PetscFunctionBegin;
3488   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
3489 
3490   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3491     c->imax           = a->imax;
3492     c->ilen           = a->ilen;
3493     c->free_imax_ilen = PETSC_FALSE;
3494   } else {
3495     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3496     for (i = 0; i < mbs; i++) {
3497       c->imax[i] = a->imax[i];
3498       c->ilen[i] = a->ilen[i];
3499     }
3500     c->free_imax_ilen = PETSC_TRUE;
3501   }
3502 
3503   /* allocate the matrix space */
3504   if (mallocmatspace) {
3505     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3506       PetscCall(PetscCalloc1(bs2 * nz, &c->a));
3507 
3508       c->i            = a->i;
3509       c->j            = a->j;
3510       c->singlemalloc = PETSC_FALSE;
3511       c->free_a       = PETSC_TRUE;
3512       c->free_ij      = PETSC_FALSE;
3513       c->parent       = A;
3514       C->preallocated = PETSC_TRUE;
3515       C->assembled    = PETSC_TRUE;
3516 
3517       PetscCall(PetscObjectReference((PetscObject)A));
3518       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3519       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3520     } else {
3521       PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i));
3522 
3523       c->singlemalloc = PETSC_TRUE;
3524       c->free_a       = PETSC_TRUE;
3525       c->free_ij      = PETSC_TRUE;
3526 
3527       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3528       if (mbs > 0) {
3529         PetscCall(PetscArraycpy(c->j, a->j, nz));
3530         if (cpvalues == MAT_COPY_VALUES) {
3531           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
3532         } else {
3533           PetscCall(PetscArrayzero(c->a, bs2 * nz));
3534         }
3535       }
3536       C->preallocated = PETSC_TRUE;
3537       C->assembled    = PETSC_TRUE;
3538     }
3539   }
3540 
3541   c->roworiented = a->roworiented;
3542   c->nonew       = a->nonew;
3543 
3544   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
3545   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
3546 
3547   c->bs2 = a->bs2;
3548   c->mbs = a->mbs;
3549   c->nbs = a->nbs;
3550 
3551   if (a->diag) {
3552     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3553       c->diag      = a->diag;
3554       c->free_diag = PETSC_FALSE;
3555     } else {
3556       PetscCall(PetscMalloc1(mbs + 1, &c->diag));
3557       for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i];
3558       c->free_diag = PETSC_TRUE;
3559     }
3560   } else c->diag = NULL;
3561 
3562   c->nz         = a->nz;
3563   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3564   c->solve_work = NULL;
3565   c->mult_work  = NULL;
3566   c->sor_workt  = NULL;
3567   c->sor_work   = NULL;
3568 
3569   c->compressedrow.use   = a->compressedrow.use;
3570   c->compressedrow.nrows = a->compressedrow.nrows;
3571   if (a->compressedrow.use) {
3572     i = a->compressedrow.nrows;
3573     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
3574     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
3575     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
3576   } else {
3577     c->compressedrow.use    = PETSC_FALSE;
3578     c->compressedrow.i      = NULL;
3579     c->compressedrow.rindex = NULL;
3580   }
3581   C->nonzerostate = A->nonzerostate;
3582 
3583   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
3584   PetscFunctionReturn(PETSC_SUCCESS);
3585 }
3586 
3587 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3588 {
3589   PetscFunctionBegin;
3590   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
3591   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
3592   PetscCall(MatSetType(*B, MATSEQBAIJ));
3593   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
3594   PetscFunctionReturn(PETSC_SUCCESS);
3595 }
3596 
3597 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3598 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3599 {
3600   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3601   PetscInt    *rowidxs, *colidxs;
3602   PetscScalar *matvals;
3603 
3604   PetscFunctionBegin;
3605   PetscCall(PetscViewerSetUp(viewer));
3606 
3607   /* read matrix header */
3608   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3609   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3610   M  = header[1];
3611   N  = header[2];
3612   nz = header[3];
3613   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3614   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3615   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3616 
3617   /* set block sizes from the viewer's .info file */
3618   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3619   /* set local and global sizes if not set already */
3620   if (mat->rmap->n < 0) mat->rmap->n = M;
3621   if (mat->cmap->n < 0) mat->cmap->n = N;
3622   if (mat->rmap->N < 0) mat->rmap->N = M;
3623   if (mat->cmap->N < 0) mat->cmap->N = N;
3624   PetscCall(PetscLayoutSetUp(mat->rmap));
3625   PetscCall(PetscLayoutSetUp(mat->cmap));
3626 
3627   /* check if the matrix sizes are correct */
3628   PetscCall(MatGetSize(mat, &rows, &cols));
3629   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3630   PetscCall(MatGetBlockSize(mat, &bs));
3631   PetscCall(MatGetLocalSize(mat, &m, &n));
3632   mbs = m / bs;
3633   nbs = n / bs;
3634 
3635   /* read in row lengths, column indices and nonzero values */
3636   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3637   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
3638   rowidxs[0] = 0;
3639   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3640   sum = rowidxs[m];
3641   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3642 
3643   /* read in column indices and nonzero values */
3644   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
3645   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
3646   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3647 
3648   {               /* preallocate matrix storage */
3649     PetscBT   bt; /* helper bit set to count nonzeros */
3650     PetscInt *nnz;
3651     PetscBool sbaij;
3652 
3653     PetscCall(PetscBTCreate(nbs, &bt));
3654     PetscCall(PetscCalloc1(mbs, &nnz));
3655     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3656     for (i = 0; i < mbs; i++) {
3657       PetscCall(PetscBTMemzero(nbs, bt));
3658       for (k = 0; k < bs; k++) {
3659         PetscInt row = bs * i + k;
3660         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3661           PetscInt col = colidxs[j];
3662           if (!sbaij || col >= row)
3663             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3664         }
3665       }
3666     }
3667     PetscCall(PetscBTDestroy(&bt));
3668     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
3669     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
3670     PetscCall(PetscFree(nnz));
3671   }
3672 
3673   /* store matrix values */
3674   for (i = 0; i < m; i++) {
3675     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
3676     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3677   }
3678 
3679   PetscCall(PetscFree(rowidxs));
3680   PetscCall(PetscFree2(colidxs, matvals));
3681   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3682   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3683   PetscFunctionReturn(PETSC_SUCCESS);
3684 }
3685 
3686 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3687 {
3688   PetscBool isbinary;
3689 
3690   PetscFunctionBegin;
3691   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3692   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3693   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
3694   PetscFunctionReturn(PETSC_SUCCESS);
3695 }
3696 
3697 /*@C
3698    MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3699    compressed row) format.  For good matrix assembly performance the
3700    user should preallocate the matrix storage by setting the parameter nz
3701    (or the array nnz).  By setting these parameters accurately, performance
3702    during matrix assembly can be increased by more than a factor of 50.
3703 
3704    Collective
3705 
3706    Input Parameters:
3707 +  comm - MPI communicator, set to `PETSC_COMM_SELF`
3708 .  bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3709           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3710 .  m - number of rows
3711 .  n - number of columns
3712 .  nz - number of nonzero blocks  per block row (same for all rows)
3713 -  nnz - array containing the number of nonzero blocks in the various block rows
3714          (possibly different for each block row) or NULL
3715 
3716    Output Parameter:
3717 .  A - the matrix
3718 
3719    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3720    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3721    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
3722 
3723    Options Database Keys:
3724 +   -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3725 -   -mat_block_size - size of the blocks to use
3726 
3727    Level: intermediate
3728 
3729    Notes:
3730    The number of rows and columns must be divisible by blocksize.
3731 
3732    If the nnz parameter is given then the nz parameter is ignored
3733 
3734    A nonzero block is any block that as 1 or more nonzeros in it
3735 
3736    The `MATSEQBAIJ` format is fully compatible with standard Fortran 77
3737    storage.  That is, the stored row and column indices can begin at
3738    either one (as in Fortran) or zero.  See the users' manual for details.
3739 
3740    Specify the preallocated storage with either nz or nnz (not both).
3741    Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
3742    allocation.  See [Sparse Matrices](sec_matsparse) for details.
3743    matrices.
3744 
3745 .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3746 @*/
3747 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3748 {
3749   PetscFunctionBegin;
3750   PetscCall(MatCreate(comm, A));
3751   PetscCall(MatSetSizes(*A, m, n, m, n));
3752   PetscCall(MatSetType(*A, MATSEQBAIJ));
3753   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
3754   PetscFunctionReturn(PETSC_SUCCESS);
3755 }
3756 
3757 /*@C
3758    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3759    per row in the matrix. For good matrix assembly performance the
3760    user should preallocate the matrix storage by setting the parameter nz
3761    (or the array nnz).  By setting these parameters accurately, performance
3762    during matrix assembly can be increased by more than a factor of 50.
3763 
3764    Collective
3765 
3766    Input Parameters:
3767 +  B - the matrix
3768 .  bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3769           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3770 .  nz - number of block nonzeros per block row (same for all rows)
3771 -  nnz - array containing the number of block nonzeros in the various block rows
3772          (possibly different for each block row) or NULL
3773 
3774    Options Database Keys:
3775 +   -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3776 -   -mat_block_size - size of the blocks to use
3777 
3778    Level: intermediate
3779 
3780    Notes:
3781    If the nnz parameter is given then the nz parameter is ignored
3782 
3783    You can call `MatGetInfo()` to get information on how effective the preallocation was;
3784    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3785    You can also run with the option -info and look for messages with the string
3786    malloc in them to see if additional memory allocation was needed.
3787 
3788    The `MATSEQBAIJ` format is fully compatible with standard Fortran 77
3789    storage.  That is, the stored row and column indices can begin at
3790    either one (as in Fortran) or zero.  See the users' manual for details.
3791 
3792    Specify the preallocated storage with either nz or nnz (not both).
3793    Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
3794    allocation.  See [Sparse Matrices](sec_matsparse) for details.
3795 
3796 .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3797 @*/
3798 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3799 {
3800   PetscFunctionBegin;
3801   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3802   PetscValidType(B, 1);
3803   PetscValidLogicalCollectiveInt(B, bs, 2);
3804   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
3805   PetscFunctionReturn(PETSC_SUCCESS);
3806 }
3807 
3808 /*@C
3809    MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3810 
3811    Collective
3812 
3813    Input Parameters:
3814 +  B - the matrix
3815 .  i - the indices into j for the start of each local row (starts with zero)
3816 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3817 -  v - optional values in the matrix
3818 
3819    Level: advanced
3820 
3821    Notes:
3822    The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
3823    may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
3824    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3825    `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3826    block column and the second index is over columns within a block.
3827 
3828    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3829 
3830 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3831 @*/
3832 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3833 {
3834   PetscFunctionBegin;
3835   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3836   PetscValidType(B, 1);
3837   PetscValidLogicalCollectiveInt(B, bs, 2);
3838   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
3839   PetscFunctionReturn(PETSC_SUCCESS);
3840 }
3841 
3842 /*@
3843      MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3844 
3845      Collective
3846 
3847    Input Parameters:
3848 +  comm - must be an MPI communicator of size 1
3849 .  bs - size of block
3850 .  m - number of rows
3851 .  n - number of columns
3852 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3853 .  j - column indices
3854 -  a - matrix values
3855 
3856    Output Parameter:
3857 .  mat - the matrix
3858 
3859    Level: advanced
3860 
3861    Notes:
3862        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3863     once the matrix is destroyed
3864 
3865        You cannot set new nonzero locations into this matrix, that will generate an error.
3866 
3867        The i and j indices are 0 based
3868 
3869        When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3870 
3871       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3872       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3873       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3874       with column-major ordering within blocks.
3875 
3876 .seealso: `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3877 @*/
3878 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3879 {
3880   Mat_SeqBAIJ *baij;
3881 
3882   PetscFunctionBegin;
3883   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
3884   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3885 
3886   PetscCall(MatCreate(comm, mat));
3887   PetscCall(MatSetSizes(*mat, m, n, m, n));
3888   PetscCall(MatSetType(*mat, MATSEQBAIJ));
3889   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3890   baij = (Mat_SeqBAIJ *)(*mat)->data;
3891   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3892 
3893   baij->i = i;
3894   baij->j = j;
3895   baij->a = a;
3896 
3897   baij->singlemalloc   = PETSC_FALSE;
3898   baij->nonew          = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3899   baij->free_a         = PETSC_FALSE;
3900   baij->free_ij        = PETSC_FALSE;
3901   baij->free_imax_ilen = PETSC_TRUE;
3902 
3903   for (PetscInt ii = 0; ii < m; ii++) {
3904     const PetscInt row_len = i[ii + 1] - i[ii];
3905 
3906     baij->ilen[ii] = baij->imax[ii] = row_len;
3907     PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
3908   }
3909   if (PetscDefined(USE_DEBUG)) {
3910     for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
3911       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3912       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3913     }
3914   }
3915 
3916   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3917   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3918   PetscFunctionReturn(PETSC_SUCCESS);
3919 }
3920 
3921 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3922 {
3923   PetscFunctionBegin;
3924   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
3925   PetscFunctionReturn(PETSC_SUCCESS);
3926 }
3927