xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 7f296bb328fcd4c99f2da7bfe8ba7ed8a4ebceee)
1 /*
2     Defines the basic matrix operations for the BAIJ (compressed row)
3   matrix storage format.
4 */
5 #include <../src/mat/impls/baij/seq/baij.h> /*I   "petscmat.h"  I*/
6 #include <petscblaslapack.h>
7 #include <petsc/private/kernels/blockinvert.h>
8 #include <petsc/private/kernels/blockmatmult.h>
9 
10 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */
11 #define TYPE BAIJ
12 #define TYPE_BS
13 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
14 #undef TYPE_BS
15 #define TYPE_BS _BS
16 #define TYPE_BS_ON
17 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
18 #undef TYPE_BS
19 #include "../src/mat/impls/aij/seq/seqhashmat.h"
20 #undef TYPE
21 #undef TYPE_BS_ON
22 
23 #if defined(PETSC_HAVE_HYPRE)
24 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
25 #endif
26 
27 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
28 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
29 #endif
30 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
31 
32 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
33 {
34   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
35   PetscInt     m, n, ib, jb, bs = A->rmap->bs;
36   MatScalar   *a_val = a_aij->a;
37 
38   PetscFunctionBegin;
39   PetscCall(MatGetSize(A, &m, &n));
40   PetscCall(PetscArrayzero(reductions, n));
41   if (type == NORM_2) {
42     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
43       for (jb = 0; jb < bs; jb++) {
44         for (ib = 0; ib < bs; ib++) {
45           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
46           a_val++;
47         }
48       }
49     }
50   } else if (type == NORM_1) {
51     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
52       for (jb = 0; jb < bs; jb++) {
53         for (ib = 0; ib < bs; ib++) {
54           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
55           a_val++;
56         }
57       }
58     }
59   } else if (type == NORM_INFINITY) {
60     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
61       for (jb = 0; jb < bs; jb++) {
62         for (ib = 0; ib < bs; ib++) {
63           PetscInt col    = A->cmap->rstart + a_aij->j[i] * bs + jb;
64           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
65           a_val++;
66         }
67       }
68     }
69   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
70     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
71       for (jb = 0; jb < bs; jb++) {
72         for (ib = 0; ib < bs; ib++) {
73           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
74           a_val++;
75         }
76       }
77     }
78   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
79     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
80       for (jb = 0; jb < bs; jb++) {
81         for (ib = 0; ib < bs; ib++) {
82           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
83           a_val++;
84         }
85       }
86     }
87   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
88   if (type == NORM_2) {
89     for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
90   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
91     for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
92   }
93   PetscFunctionReturn(PETSC_SUCCESS);
94 }
95 
96 static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
97 {
98   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
99   PetscInt    *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
100   MatScalar   *v     = a->a, *odiag, *diag, work[25], *v_work;
101   PetscReal    shift = 0.0;
102   PetscBool    allowzeropivot, zeropivotdetected = PETSC_FALSE;
103 
104   PetscFunctionBegin;
105   allowzeropivot = PetscNot(A->erroriffailure);
106 
107   if (a->idiagvalid) {
108     if (values) *values = a->idiag;
109     PetscFunctionReturn(PETSC_SUCCESS);
110   }
111   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
112   diag_offset = a->diag;
113   if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); }
114   diag = a->idiag;
115   if (values) *values = a->idiag;
116   /* factor and invert each block */
117   switch (bs) {
118   case 1:
119     for (i = 0; i < mbs; i++) {
120       odiag   = v + 1 * diag_offset[i];
121       diag[0] = odiag[0];
122 
123       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
124         if (allowzeropivot) {
125           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
126           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
127           A->factorerror_zeropivot_row   = i;
128           PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
129         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
130       }
131 
132       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
133       diag += 1;
134     }
135     break;
136   case 2:
137     for (i = 0; i < mbs; i++) {
138       odiag   = v + 4 * diag_offset[i];
139       diag[0] = odiag[0];
140       diag[1] = odiag[1];
141       diag[2] = odiag[2];
142       diag[3] = odiag[3];
143       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
144       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
145       diag += 4;
146     }
147     break;
148   case 3:
149     for (i = 0; i < mbs; i++) {
150       odiag   = v + 9 * diag_offset[i];
151       diag[0] = odiag[0];
152       diag[1] = odiag[1];
153       diag[2] = odiag[2];
154       diag[3] = odiag[3];
155       diag[4] = odiag[4];
156       diag[5] = odiag[5];
157       diag[6] = odiag[6];
158       diag[7] = odiag[7];
159       diag[8] = odiag[8];
160       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
161       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
162       diag += 9;
163     }
164     break;
165   case 4:
166     for (i = 0; i < mbs; i++) {
167       odiag = v + 16 * diag_offset[i];
168       PetscCall(PetscArraycpy(diag, odiag, 16));
169       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
170       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
171       diag += 16;
172     }
173     break;
174   case 5:
175     for (i = 0; i < mbs; i++) {
176       odiag = v + 25 * diag_offset[i];
177       PetscCall(PetscArraycpy(diag, odiag, 25));
178       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
179       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
180       diag += 25;
181     }
182     break;
183   case 6:
184     for (i = 0; i < mbs; i++) {
185       odiag = v + 36 * diag_offset[i];
186       PetscCall(PetscArraycpy(diag, odiag, 36));
187       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
188       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
189       diag += 36;
190     }
191     break;
192   case 7:
193     for (i = 0; i < mbs; i++) {
194       odiag = v + 49 * diag_offset[i];
195       PetscCall(PetscArraycpy(diag, odiag, 49));
196       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
197       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
198       diag += 49;
199     }
200     break;
201   default:
202     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
203     for (i = 0; i < mbs; i++) {
204       odiag = v + bs2 * diag_offset[i];
205       PetscCall(PetscArraycpy(diag, odiag, bs2));
206       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
207       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
208       diag += bs2;
209     }
210     PetscCall(PetscFree2(v_work, v_pivots));
211   }
212   a->idiagvalid = PETSC_TRUE;
213   PetscFunctionReturn(PETSC_SUCCESS);
214 }
215 
216 static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
217 {
218   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
219   PetscScalar       *x, *work, *w, *workt, *t;
220   const MatScalar   *v, *aa = a->a, *idiag;
221   const PetscScalar *b, *xb;
222   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
223   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
224   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;
225 
226   PetscFunctionBegin;
227   its = its * lits;
228   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
229   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
230   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
231   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
232   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
233 
234   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
235 
236   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
237   diag  = a->diag;
238   idiag = a->idiag;
239   k     = PetscMax(A->rmap->n, A->cmap->n);
240   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
241   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
242   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
243   work = a->mult_work;
244   t    = a->sor_workt;
245   w    = a->sor_work;
246 
247   PetscCall(VecGetArray(xx, &x));
248   PetscCall(VecGetArrayRead(bb, &b));
249 
250   if (flag & SOR_ZERO_INITIAL_GUESS) {
251     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
252       switch (bs) {
253       case 1:
254         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
255         t[0] = b[0];
256         i2   = 1;
257         idiag += 1;
258         for (i = 1; i < m; i++) {
259           v    = aa + ai[i];
260           vi   = aj + ai[i];
261           nz   = diag[i] - ai[i];
262           s[0] = b[i2];
263           for (j = 0; j < nz; j++) {
264             xw[0] = x[vi[j]];
265             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
266           }
267           t[i2] = s[0];
268           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
269           x[i2] = xw[0];
270           idiag += 1;
271           i2 += 1;
272         }
273         break;
274       case 2:
275         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
276         t[0] = b[0];
277         t[1] = b[1];
278         i2   = 2;
279         idiag += 4;
280         for (i = 1; i < m; i++) {
281           v    = aa + 4 * ai[i];
282           vi   = aj + ai[i];
283           nz   = diag[i] - ai[i];
284           s[0] = b[i2];
285           s[1] = b[i2 + 1];
286           for (j = 0; j < nz; j++) {
287             idx   = 2 * vi[j];
288             it    = 4 * j;
289             xw[0] = x[idx];
290             xw[1] = x[1 + idx];
291             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
292           }
293           t[i2]     = s[0];
294           t[i2 + 1] = s[1];
295           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
296           x[i2]     = xw[0];
297           x[i2 + 1] = xw[1];
298           idiag += 4;
299           i2 += 2;
300         }
301         break;
302       case 3:
303         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
304         t[0] = b[0];
305         t[1] = b[1];
306         t[2] = b[2];
307         i2   = 3;
308         idiag += 9;
309         for (i = 1; i < m; i++) {
310           v    = aa + 9 * ai[i];
311           vi   = aj + ai[i];
312           nz   = diag[i] - ai[i];
313           s[0] = b[i2];
314           s[1] = b[i2 + 1];
315           s[2] = b[i2 + 2];
316           while (nz--) {
317             idx   = 3 * (*vi++);
318             xw[0] = x[idx];
319             xw[1] = x[1 + idx];
320             xw[2] = x[2 + idx];
321             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
322             v += 9;
323           }
324           t[i2]     = s[0];
325           t[i2 + 1] = s[1];
326           t[i2 + 2] = s[2];
327           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
328           x[i2]     = xw[0];
329           x[i2 + 1] = xw[1];
330           x[i2 + 2] = xw[2];
331           idiag += 9;
332           i2 += 3;
333         }
334         break;
335       case 4:
336         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
337         t[0] = b[0];
338         t[1] = b[1];
339         t[2] = b[2];
340         t[3] = b[3];
341         i2   = 4;
342         idiag += 16;
343         for (i = 1; i < m; i++) {
344           v    = aa + 16 * ai[i];
345           vi   = aj + ai[i];
346           nz   = diag[i] - ai[i];
347           s[0] = b[i2];
348           s[1] = b[i2 + 1];
349           s[2] = b[i2 + 2];
350           s[3] = b[i2 + 3];
351           while (nz--) {
352             idx   = 4 * (*vi++);
353             xw[0] = x[idx];
354             xw[1] = x[1 + idx];
355             xw[2] = x[2 + idx];
356             xw[3] = x[3 + idx];
357             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
358             v += 16;
359           }
360           t[i2]     = s[0];
361           t[i2 + 1] = s[1];
362           t[i2 + 2] = s[2];
363           t[i2 + 3] = s[3];
364           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
365           x[i2]     = xw[0];
366           x[i2 + 1] = xw[1];
367           x[i2 + 2] = xw[2];
368           x[i2 + 3] = xw[3];
369           idiag += 16;
370           i2 += 4;
371         }
372         break;
373       case 5:
374         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
375         t[0] = b[0];
376         t[1] = b[1];
377         t[2] = b[2];
378         t[3] = b[3];
379         t[4] = b[4];
380         i2   = 5;
381         idiag += 25;
382         for (i = 1; i < m; i++) {
383           v    = aa + 25 * ai[i];
384           vi   = aj + ai[i];
385           nz   = diag[i] - ai[i];
386           s[0] = b[i2];
387           s[1] = b[i2 + 1];
388           s[2] = b[i2 + 2];
389           s[3] = b[i2 + 3];
390           s[4] = b[i2 + 4];
391           while (nz--) {
392             idx   = 5 * (*vi++);
393             xw[0] = x[idx];
394             xw[1] = x[1 + idx];
395             xw[2] = x[2 + idx];
396             xw[3] = x[3 + idx];
397             xw[4] = x[4 + idx];
398             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
399             v += 25;
400           }
401           t[i2]     = s[0];
402           t[i2 + 1] = s[1];
403           t[i2 + 2] = s[2];
404           t[i2 + 3] = s[3];
405           t[i2 + 4] = s[4];
406           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
407           x[i2]     = xw[0];
408           x[i2 + 1] = xw[1];
409           x[i2 + 2] = xw[2];
410           x[i2 + 3] = xw[3];
411           x[i2 + 4] = xw[4];
412           idiag += 25;
413           i2 += 5;
414         }
415         break;
416       case 6:
417         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
418         t[0] = b[0];
419         t[1] = b[1];
420         t[2] = b[2];
421         t[3] = b[3];
422         t[4] = b[4];
423         t[5] = b[5];
424         i2   = 6;
425         idiag += 36;
426         for (i = 1; i < m; i++) {
427           v    = aa + 36 * ai[i];
428           vi   = aj + ai[i];
429           nz   = diag[i] - ai[i];
430           s[0] = b[i2];
431           s[1] = b[i2 + 1];
432           s[2] = b[i2 + 2];
433           s[3] = b[i2 + 3];
434           s[4] = b[i2 + 4];
435           s[5] = b[i2 + 5];
436           while (nz--) {
437             idx   = 6 * (*vi++);
438             xw[0] = x[idx];
439             xw[1] = x[1 + idx];
440             xw[2] = x[2 + idx];
441             xw[3] = x[3 + idx];
442             xw[4] = x[4 + idx];
443             xw[5] = x[5 + idx];
444             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
445             v += 36;
446           }
447           t[i2]     = s[0];
448           t[i2 + 1] = s[1];
449           t[i2 + 2] = s[2];
450           t[i2 + 3] = s[3];
451           t[i2 + 4] = s[4];
452           t[i2 + 5] = s[5];
453           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
454           x[i2]     = xw[0];
455           x[i2 + 1] = xw[1];
456           x[i2 + 2] = xw[2];
457           x[i2 + 3] = xw[3];
458           x[i2 + 4] = xw[4];
459           x[i2 + 5] = xw[5];
460           idiag += 36;
461           i2 += 6;
462         }
463         break;
464       case 7:
465         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
466         t[0] = b[0];
467         t[1] = b[1];
468         t[2] = b[2];
469         t[3] = b[3];
470         t[4] = b[4];
471         t[5] = b[5];
472         t[6] = b[6];
473         i2   = 7;
474         idiag += 49;
475         for (i = 1; i < m; i++) {
476           v    = aa + 49 * ai[i];
477           vi   = aj + ai[i];
478           nz   = diag[i] - ai[i];
479           s[0] = b[i2];
480           s[1] = b[i2 + 1];
481           s[2] = b[i2 + 2];
482           s[3] = b[i2 + 3];
483           s[4] = b[i2 + 4];
484           s[5] = b[i2 + 5];
485           s[6] = b[i2 + 6];
486           while (nz--) {
487             idx   = 7 * (*vi++);
488             xw[0] = x[idx];
489             xw[1] = x[1 + idx];
490             xw[2] = x[2 + idx];
491             xw[3] = x[3 + idx];
492             xw[4] = x[4 + idx];
493             xw[5] = x[5 + idx];
494             xw[6] = x[6 + idx];
495             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
496             v += 49;
497           }
498           t[i2]     = s[0];
499           t[i2 + 1] = s[1];
500           t[i2 + 2] = s[2];
501           t[i2 + 3] = s[3];
502           t[i2 + 4] = s[4];
503           t[i2 + 5] = s[5];
504           t[i2 + 6] = s[6];
505           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
506           x[i2]     = xw[0];
507           x[i2 + 1] = xw[1];
508           x[i2 + 2] = xw[2];
509           x[i2 + 3] = xw[3];
510           x[i2 + 4] = xw[4];
511           x[i2 + 5] = xw[5];
512           x[i2 + 6] = xw[6];
513           idiag += 49;
514           i2 += 7;
515         }
516         break;
517       default:
518         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
519         PetscCall(PetscArraycpy(t, b, bs));
520         i2 = bs;
521         idiag += bs2;
522         for (i = 1; i < m; i++) {
523           v  = aa + bs2 * ai[i];
524           vi = aj + ai[i];
525           nz = diag[i] - ai[i];
526 
527           PetscCall(PetscArraycpy(w, b + i2, bs));
528           /* copy all rows of x that are needed into contiguous space */
529           workt = work;
530           for (j = 0; j < nz; j++) {
531             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
532             workt += bs;
533           }
534           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
535           PetscCall(PetscArraycpy(t + i2, w, bs));
536           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
537 
538           idiag += bs2;
539           i2 += bs;
540         }
541         break;
542       }
543       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
544       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
545       xb = t;
546     } else xb = b;
547     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
548       idiag = a->idiag + bs2 * (a->mbs - 1);
549       i2    = bs * (m - 1);
550       switch (bs) {
551       case 1:
552         s[0] = xb[i2];
553         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
554         x[i2] = xw[0];
555         i2 -= 1;
556         for (i = m - 2; i >= 0; i--) {
557           v    = aa + (diag[i] + 1);
558           vi   = aj + diag[i] + 1;
559           nz   = ai[i + 1] - diag[i] - 1;
560           s[0] = xb[i2];
561           for (j = 0; j < nz; j++) {
562             xw[0] = x[vi[j]];
563             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
564           }
565           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
566           x[i2] = xw[0];
567           idiag -= 1;
568           i2 -= 1;
569         }
570         break;
571       case 2:
572         s[0] = xb[i2];
573         s[1] = xb[i2 + 1];
574         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
575         x[i2]     = xw[0];
576         x[i2 + 1] = xw[1];
577         i2 -= 2;
578         idiag -= 4;
579         for (i = m - 2; i >= 0; i--) {
580           v    = aa + 4 * (diag[i] + 1);
581           vi   = aj + diag[i] + 1;
582           nz   = ai[i + 1] - diag[i] - 1;
583           s[0] = xb[i2];
584           s[1] = xb[i2 + 1];
585           for (j = 0; j < nz; j++) {
586             idx   = 2 * vi[j];
587             it    = 4 * j;
588             xw[0] = x[idx];
589             xw[1] = x[1 + idx];
590             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
591           }
592           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
593           x[i2]     = xw[0];
594           x[i2 + 1] = xw[1];
595           idiag -= 4;
596           i2 -= 2;
597         }
598         break;
599       case 3:
600         s[0] = xb[i2];
601         s[1] = xb[i2 + 1];
602         s[2] = xb[i2 + 2];
603         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
604         x[i2]     = xw[0];
605         x[i2 + 1] = xw[1];
606         x[i2 + 2] = xw[2];
607         i2 -= 3;
608         idiag -= 9;
609         for (i = m - 2; i >= 0; i--) {
610           v    = aa + 9 * (diag[i] + 1);
611           vi   = aj + diag[i] + 1;
612           nz   = ai[i + 1] - diag[i] - 1;
613           s[0] = xb[i2];
614           s[1] = xb[i2 + 1];
615           s[2] = xb[i2 + 2];
616           while (nz--) {
617             idx   = 3 * (*vi++);
618             xw[0] = x[idx];
619             xw[1] = x[1 + idx];
620             xw[2] = x[2 + idx];
621             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
622             v += 9;
623           }
624           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
625           x[i2]     = xw[0];
626           x[i2 + 1] = xw[1];
627           x[i2 + 2] = xw[2];
628           idiag -= 9;
629           i2 -= 3;
630         }
631         break;
632       case 4:
633         s[0] = xb[i2];
634         s[1] = xb[i2 + 1];
635         s[2] = xb[i2 + 2];
636         s[3] = xb[i2 + 3];
637         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
638         x[i2]     = xw[0];
639         x[i2 + 1] = xw[1];
640         x[i2 + 2] = xw[2];
641         x[i2 + 3] = xw[3];
642         i2 -= 4;
643         idiag -= 16;
644         for (i = m - 2; i >= 0; i--) {
645           v    = aa + 16 * (diag[i] + 1);
646           vi   = aj + diag[i] + 1;
647           nz   = ai[i + 1] - diag[i] - 1;
648           s[0] = xb[i2];
649           s[1] = xb[i2 + 1];
650           s[2] = xb[i2 + 2];
651           s[3] = xb[i2 + 3];
652           while (nz--) {
653             idx   = 4 * (*vi++);
654             xw[0] = x[idx];
655             xw[1] = x[1 + idx];
656             xw[2] = x[2 + idx];
657             xw[3] = x[3 + idx];
658             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
659             v += 16;
660           }
661           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
662           x[i2]     = xw[0];
663           x[i2 + 1] = xw[1];
664           x[i2 + 2] = xw[2];
665           x[i2 + 3] = xw[3];
666           idiag -= 16;
667           i2 -= 4;
668         }
669         break;
670       case 5:
671         s[0] = xb[i2];
672         s[1] = xb[i2 + 1];
673         s[2] = xb[i2 + 2];
674         s[3] = xb[i2 + 3];
675         s[4] = xb[i2 + 4];
676         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
677         x[i2]     = xw[0];
678         x[i2 + 1] = xw[1];
679         x[i2 + 2] = xw[2];
680         x[i2 + 3] = xw[3];
681         x[i2 + 4] = xw[4];
682         i2 -= 5;
683         idiag -= 25;
684         for (i = m - 2; i >= 0; i--) {
685           v    = aa + 25 * (diag[i] + 1);
686           vi   = aj + diag[i] + 1;
687           nz   = ai[i + 1] - diag[i] - 1;
688           s[0] = xb[i2];
689           s[1] = xb[i2 + 1];
690           s[2] = xb[i2 + 2];
691           s[3] = xb[i2 + 3];
692           s[4] = xb[i2 + 4];
693           while (nz--) {
694             idx   = 5 * (*vi++);
695             xw[0] = x[idx];
696             xw[1] = x[1 + idx];
697             xw[2] = x[2 + idx];
698             xw[3] = x[3 + idx];
699             xw[4] = x[4 + idx];
700             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
701             v += 25;
702           }
703           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
704           x[i2]     = xw[0];
705           x[i2 + 1] = xw[1];
706           x[i2 + 2] = xw[2];
707           x[i2 + 3] = xw[3];
708           x[i2 + 4] = xw[4];
709           idiag -= 25;
710           i2 -= 5;
711         }
712         break;
713       case 6:
714         s[0] = xb[i2];
715         s[1] = xb[i2 + 1];
716         s[2] = xb[i2 + 2];
717         s[3] = xb[i2 + 3];
718         s[4] = xb[i2 + 4];
719         s[5] = xb[i2 + 5];
720         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
721         x[i2]     = xw[0];
722         x[i2 + 1] = xw[1];
723         x[i2 + 2] = xw[2];
724         x[i2 + 3] = xw[3];
725         x[i2 + 4] = xw[4];
726         x[i2 + 5] = xw[5];
727         i2 -= 6;
728         idiag -= 36;
729         for (i = m - 2; i >= 0; i--) {
730           v    = aa + 36 * (diag[i] + 1);
731           vi   = aj + diag[i] + 1;
732           nz   = ai[i + 1] - diag[i] - 1;
733           s[0] = xb[i2];
734           s[1] = xb[i2 + 1];
735           s[2] = xb[i2 + 2];
736           s[3] = xb[i2 + 3];
737           s[4] = xb[i2 + 4];
738           s[5] = xb[i2 + 5];
739           while (nz--) {
740             idx   = 6 * (*vi++);
741             xw[0] = x[idx];
742             xw[1] = x[1 + idx];
743             xw[2] = x[2 + idx];
744             xw[3] = x[3 + idx];
745             xw[4] = x[4 + idx];
746             xw[5] = x[5 + idx];
747             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
748             v += 36;
749           }
750           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
751           x[i2]     = xw[0];
752           x[i2 + 1] = xw[1];
753           x[i2 + 2] = xw[2];
754           x[i2 + 3] = xw[3];
755           x[i2 + 4] = xw[4];
756           x[i2 + 5] = xw[5];
757           idiag -= 36;
758           i2 -= 6;
759         }
760         break;
761       case 7:
762         s[0] = xb[i2];
763         s[1] = xb[i2 + 1];
764         s[2] = xb[i2 + 2];
765         s[3] = xb[i2 + 3];
766         s[4] = xb[i2 + 4];
767         s[5] = xb[i2 + 5];
768         s[6] = xb[i2 + 6];
769         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
770         x[i2]     = xw[0];
771         x[i2 + 1] = xw[1];
772         x[i2 + 2] = xw[2];
773         x[i2 + 3] = xw[3];
774         x[i2 + 4] = xw[4];
775         x[i2 + 5] = xw[5];
776         x[i2 + 6] = xw[6];
777         i2 -= 7;
778         idiag -= 49;
779         for (i = m - 2; i >= 0; i--) {
780           v    = aa + 49 * (diag[i] + 1);
781           vi   = aj + diag[i] + 1;
782           nz   = ai[i + 1] - diag[i] - 1;
783           s[0] = xb[i2];
784           s[1] = xb[i2 + 1];
785           s[2] = xb[i2 + 2];
786           s[3] = xb[i2 + 3];
787           s[4] = xb[i2 + 4];
788           s[5] = xb[i2 + 5];
789           s[6] = xb[i2 + 6];
790           while (nz--) {
791             idx   = 7 * (*vi++);
792             xw[0] = x[idx];
793             xw[1] = x[1 + idx];
794             xw[2] = x[2 + idx];
795             xw[3] = x[3 + idx];
796             xw[4] = x[4 + idx];
797             xw[5] = x[5 + idx];
798             xw[6] = x[6 + idx];
799             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
800             v += 49;
801           }
802           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
803           x[i2]     = xw[0];
804           x[i2 + 1] = xw[1];
805           x[i2 + 2] = xw[2];
806           x[i2 + 3] = xw[3];
807           x[i2 + 4] = xw[4];
808           x[i2 + 5] = xw[5];
809           x[i2 + 6] = xw[6];
810           idiag -= 49;
811           i2 -= 7;
812         }
813         break;
814       default:
815         PetscCall(PetscArraycpy(w, xb + i2, bs));
816         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
817         i2 -= bs;
818         idiag -= bs2;
819         for (i = m - 2; i >= 0; i--) {
820           v  = aa + bs2 * (diag[i] + 1);
821           vi = aj + diag[i] + 1;
822           nz = ai[i + 1] - diag[i] - 1;
823 
824           PetscCall(PetscArraycpy(w, xb + i2, bs));
825           /* copy all rows of x that are needed into contiguous space */
826           workt = work;
827           for (j = 0; j < nz; j++) {
828             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
829             workt += bs;
830           }
831           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
832           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
833 
834           idiag -= bs2;
835           i2 -= bs;
836         }
837         break;
838       }
839       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
840     }
841     its--;
842   }
843   while (its--) {
844     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
845       idiag = a->idiag;
846       i2    = 0;
847       switch (bs) {
848       case 1:
849         for (i = 0; i < m; i++) {
850           v    = aa + ai[i];
851           vi   = aj + ai[i];
852           nz   = ai[i + 1] - ai[i];
853           s[0] = b[i2];
854           for (j = 0; j < nz; j++) {
855             xw[0] = x[vi[j]];
856             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
857           }
858           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
859           x[i2] += xw[0];
860           idiag += 1;
861           i2 += 1;
862         }
863         break;
864       case 2:
865         for (i = 0; i < m; i++) {
866           v    = aa + 4 * ai[i];
867           vi   = aj + ai[i];
868           nz   = ai[i + 1] - ai[i];
869           s[0] = b[i2];
870           s[1] = b[i2 + 1];
871           for (j = 0; j < nz; j++) {
872             idx   = 2 * vi[j];
873             it    = 4 * j;
874             xw[0] = x[idx];
875             xw[1] = x[1 + idx];
876             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
877           }
878           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
879           x[i2] += xw[0];
880           x[i2 + 1] += xw[1];
881           idiag += 4;
882           i2 += 2;
883         }
884         break;
885       case 3:
886         for (i = 0; i < m; i++) {
887           v    = aa + 9 * ai[i];
888           vi   = aj + ai[i];
889           nz   = ai[i + 1] - ai[i];
890           s[0] = b[i2];
891           s[1] = b[i2 + 1];
892           s[2] = b[i2 + 2];
893           while (nz--) {
894             idx   = 3 * (*vi++);
895             xw[0] = x[idx];
896             xw[1] = x[1 + idx];
897             xw[2] = x[2 + idx];
898             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
899             v += 9;
900           }
901           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
902           x[i2] += xw[0];
903           x[i2 + 1] += xw[1];
904           x[i2 + 2] += xw[2];
905           idiag += 9;
906           i2 += 3;
907         }
908         break;
909       case 4:
910         for (i = 0; i < m; i++) {
911           v    = aa + 16 * ai[i];
912           vi   = aj + ai[i];
913           nz   = ai[i + 1] - ai[i];
914           s[0] = b[i2];
915           s[1] = b[i2 + 1];
916           s[2] = b[i2 + 2];
917           s[3] = b[i2 + 3];
918           while (nz--) {
919             idx   = 4 * (*vi++);
920             xw[0] = x[idx];
921             xw[1] = x[1 + idx];
922             xw[2] = x[2 + idx];
923             xw[3] = x[3 + idx];
924             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
925             v += 16;
926           }
927           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
928           x[i2] += xw[0];
929           x[i2 + 1] += xw[1];
930           x[i2 + 2] += xw[2];
931           x[i2 + 3] += xw[3];
932           idiag += 16;
933           i2 += 4;
934         }
935         break;
936       case 5:
937         for (i = 0; i < m; i++) {
938           v    = aa + 25 * ai[i];
939           vi   = aj + ai[i];
940           nz   = ai[i + 1] - ai[i];
941           s[0] = b[i2];
942           s[1] = b[i2 + 1];
943           s[2] = b[i2 + 2];
944           s[3] = b[i2 + 3];
945           s[4] = b[i2 + 4];
946           while (nz--) {
947             idx   = 5 * (*vi++);
948             xw[0] = x[idx];
949             xw[1] = x[1 + idx];
950             xw[2] = x[2 + idx];
951             xw[3] = x[3 + idx];
952             xw[4] = x[4 + idx];
953             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
954             v += 25;
955           }
956           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
957           x[i2] += xw[0];
958           x[i2 + 1] += xw[1];
959           x[i2 + 2] += xw[2];
960           x[i2 + 3] += xw[3];
961           x[i2 + 4] += xw[4];
962           idiag += 25;
963           i2 += 5;
964         }
965         break;
966       case 6:
967         for (i = 0; i < m; i++) {
968           v    = aa + 36 * ai[i];
969           vi   = aj + ai[i];
970           nz   = ai[i + 1] - ai[i];
971           s[0] = b[i2];
972           s[1] = b[i2 + 1];
973           s[2] = b[i2 + 2];
974           s[3] = b[i2 + 3];
975           s[4] = b[i2 + 4];
976           s[5] = b[i2 + 5];
977           while (nz--) {
978             idx   = 6 * (*vi++);
979             xw[0] = x[idx];
980             xw[1] = x[1 + idx];
981             xw[2] = x[2 + idx];
982             xw[3] = x[3 + idx];
983             xw[4] = x[4 + idx];
984             xw[5] = x[5 + idx];
985             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
986             v += 36;
987           }
988           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
989           x[i2] += xw[0];
990           x[i2 + 1] += xw[1];
991           x[i2 + 2] += xw[2];
992           x[i2 + 3] += xw[3];
993           x[i2 + 4] += xw[4];
994           x[i2 + 5] += xw[5];
995           idiag += 36;
996           i2 += 6;
997         }
998         break;
999       case 7:
1000         for (i = 0; i < m; i++) {
1001           v    = aa + 49 * ai[i];
1002           vi   = aj + ai[i];
1003           nz   = ai[i + 1] - ai[i];
1004           s[0] = b[i2];
1005           s[1] = b[i2 + 1];
1006           s[2] = b[i2 + 2];
1007           s[3] = b[i2 + 3];
1008           s[4] = b[i2 + 4];
1009           s[5] = b[i2 + 5];
1010           s[6] = b[i2 + 6];
1011           while (nz--) {
1012             idx   = 7 * (*vi++);
1013             xw[0] = x[idx];
1014             xw[1] = x[1 + idx];
1015             xw[2] = x[2 + idx];
1016             xw[3] = x[3 + idx];
1017             xw[4] = x[4 + idx];
1018             xw[5] = x[5 + idx];
1019             xw[6] = x[6 + idx];
1020             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1021             v += 49;
1022           }
1023           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1024           x[i2] += xw[0];
1025           x[i2 + 1] += xw[1];
1026           x[i2 + 2] += xw[2];
1027           x[i2 + 3] += xw[3];
1028           x[i2 + 4] += xw[4];
1029           x[i2 + 5] += xw[5];
1030           x[i2 + 6] += xw[6];
1031           idiag += 49;
1032           i2 += 7;
1033         }
1034         break;
1035       default:
1036         for (i = 0; i < m; i++) {
1037           v  = aa + bs2 * ai[i];
1038           vi = aj + ai[i];
1039           nz = ai[i + 1] - ai[i];
1040 
1041           PetscCall(PetscArraycpy(w, b + i2, bs));
1042           /* copy all rows of x that are needed into contiguous space */
1043           workt = work;
1044           for (j = 0; j < nz; j++) {
1045             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1046             workt += bs;
1047           }
1048           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1049           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1050 
1051           idiag += bs2;
1052           i2 += bs;
1053         }
1054         break;
1055       }
1056       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1057     }
1058     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1059       idiag = a->idiag + bs2 * (a->mbs - 1);
1060       i2    = bs * (m - 1);
1061       switch (bs) {
1062       case 1:
1063         for (i = m - 1; i >= 0; i--) {
1064           v    = aa + ai[i];
1065           vi   = aj + ai[i];
1066           nz   = ai[i + 1] - ai[i];
1067           s[0] = b[i2];
1068           for (j = 0; j < nz; j++) {
1069             xw[0] = x[vi[j]];
1070             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1071           }
1072           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1073           x[i2] += xw[0];
1074           idiag -= 1;
1075           i2 -= 1;
1076         }
1077         break;
1078       case 2:
1079         for (i = m - 1; i >= 0; i--) {
1080           v    = aa + 4 * ai[i];
1081           vi   = aj + ai[i];
1082           nz   = ai[i + 1] - ai[i];
1083           s[0] = b[i2];
1084           s[1] = b[i2 + 1];
1085           for (j = 0; j < nz; j++) {
1086             idx   = 2 * vi[j];
1087             it    = 4 * j;
1088             xw[0] = x[idx];
1089             xw[1] = x[1 + idx];
1090             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1091           }
1092           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
1093           x[i2] += xw[0];
1094           x[i2 + 1] += xw[1];
1095           idiag -= 4;
1096           i2 -= 2;
1097         }
1098         break;
1099       case 3:
1100         for (i = m - 1; i >= 0; i--) {
1101           v    = aa + 9 * ai[i];
1102           vi   = aj + ai[i];
1103           nz   = ai[i + 1] - ai[i];
1104           s[0] = b[i2];
1105           s[1] = b[i2 + 1];
1106           s[2] = b[i2 + 2];
1107           while (nz--) {
1108             idx   = 3 * (*vi++);
1109             xw[0] = x[idx];
1110             xw[1] = x[1 + idx];
1111             xw[2] = x[2 + idx];
1112             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1113             v += 9;
1114           }
1115           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
1116           x[i2] += xw[0];
1117           x[i2 + 1] += xw[1];
1118           x[i2 + 2] += xw[2];
1119           idiag -= 9;
1120           i2 -= 3;
1121         }
1122         break;
1123       case 4:
1124         for (i = m - 1; i >= 0; i--) {
1125           v    = aa + 16 * ai[i];
1126           vi   = aj + ai[i];
1127           nz   = ai[i + 1] - ai[i];
1128           s[0] = b[i2];
1129           s[1] = b[i2 + 1];
1130           s[2] = b[i2 + 2];
1131           s[3] = b[i2 + 3];
1132           while (nz--) {
1133             idx   = 4 * (*vi++);
1134             xw[0] = x[idx];
1135             xw[1] = x[1 + idx];
1136             xw[2] = x[2 + idx];
1137             xw[3] = x[3 + idx];
1138             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1139             v += 16;
1140           }
1141           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
1142           x[i2] += xw[0];
1143           x[i2 + 1] += xw[1];
1144           x[i2 + 2] += xw[2];
1145           x[i2 + 3] += xw[3];
1146           idiag -= 16;
1147           i2 -= 4;
1148         }
1149         break;
1150       case 5:
1151         for (i = m - 1; i >= 0; i--) {
1152           v    = aa + 25 * ai[i];
1153           vi   = aj + ai[i];
1154           nz   = ai[i + 1] - ai[i];
1155           s[0] = b[i2];
1156           s[1] = b[i2 + 1];
1157           s[2] = b[i2 + 2];
1158           s[3] = b[i2 + 3];
1159           s[4] = b[i2 + 4];
1160           while (nz--) {
1161             idx   = 5 * (*vi++);
1162             xw[0] = x[idx];
1163             xw[1] = x[1 + idx];
1164             xw[2] = x[2 + idx];
1165             xw[3] = x[3 + idx];
1166             xw[4] = x[4 + idx];
1167             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1168             v += 25;
1169           }
1170           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
1171           x[i2] += xw[0];
1172           x[i2 + 1] += xw[1];
1173           x[i2 + 2] += xw[2];
1174           x[i2 + 3] += xw[3];
1175           x[i2 + 4] += xw[4];
1176           idiag -= 25;
1177           i2 -= 5;
1178         }
1179         break;
1180       case 6:
1181         for (i = m - 1; i >= 0; i--) {
1182           v    = aa + 36 * ai[i];
1183           vi   = aj + ai[i];
1184           nz   = ai[i + 1] - ai[i];
1185           s[0] = b[i2];
1186           s[1] = b[i2 + 1];
1187           s[2] = b[i2 + 2];
1188           s[3] = b[i2 + 3];
1189           s[4] = b[i2 + 4];
1190           s[5] = b[i2 + 5];
1191           while (nz--) {
1192             idx   = 6 * (*vi++);
1193             xw[0] = x[idx];
1194             xw[1] = x[1 + idx];
1195             xw[2] = x[2 + idx];
1196             xw[3] = x[3 + idx];
1197             xw[4] = x[4 + idx];
1198             xw[5] = x[5 + idx];
1199             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1200             v += 36;
1201           }
1202           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
1203           x[i2] += xw[0];
1204           x[i2 + 1] += xw[1];
1205           x[i2 + 2] += xw[2];
1206           x[i2 + 3] += xw[3];
1207           x[i2 + 4] += xw[4];
1208           x[i2 + 5] += xw[5];
1209           idiag -= 36;
1210           i2 -= 6;
1211         }
1212         break;
1213       case 7:
1214         for (i = m - 1; i >= 0; i--) {
1215           v    = aa + 49 * ai[i];
1216           vi   = aj + ai[i];
1217           nz   = ai[i + 1] - ai[i];
1218           s[0] = b[i2];
1219           s[1] = b[i2 + 1];
1220           s[2] = b[i2 + 2];
1221           s[3] = b[i2 + 3];
1222           s[4] = b[i2 + 4];
1223           s[5] = b[i2 + 5];
1224           s[6] = b[i2 + 6];
1225           while (nz--) {
1226             idx   = 7 * (*vi++);
1227             xw[0] = x[idx];
1228             xw[1] = x[1 + idx];
1229             xw[2] = x[2 + idx];
1230             xw[3] = x[3 + idx];
1231             xw[4] = x[4 + idx];
1232             xw[5] = x[5 + idx];
1233             xw[6] = x[6 + idx];
1234             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1235             v += 49;
1236           }
1237           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1238           x[i2] += xw[0];
1239           x[i2 + 1] += xw[1];
1240           x[i2 + 2] += xw[2];
1241           x[i2 + 3] += xw[3];
1242           x[i2 + 4] += xw[4];
1243           x[i2 + 5] += xw[5];
1244           x[i2 + 6] += xw[6];
1245           idiag -= 49;
1246           i2 -= 7;
1247         }
1248         break;
1249       default:
1250         for (i = m - 1; i >= 0; i--) {
1251           v  = aa + bs2 * ai[i];
1252           vi = aj + ai[i];
1253           nz = ai[i + 1] - ai[i];
1254 
1255           PetscCall(PetscArraycpy(w, b + i2, bs));
1256           /* copy all rows of x that are needed into contiguous space */
1257           workt = work;
1258           for (j = 0; j < nz; j++) {
1259             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1260             workt += bs;
1261           }
1262           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1263           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1264 
1265           idiag -= bs2;
1266           i2 -= bs;
1267         }
1268         break;
1269       }
1270       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1271     }
1272   }
1273   PetscCall(VecRestoreArray(xx, &x));
1274   PetscCall(VecRestoreArrayRead(bb, &b));
1275   PetscFunctionReturn(PETSC_SUCCESS);
1276 }
1277 
1278 /*
1279     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1280 */
1281 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1282   #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1283 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1284   #define matsetvaluesblocked4_ matsetvaluesblocked4
1285 #endif
1286 
1287 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1288 {
1289   Mat                A = *AA;
1290   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1291   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1292   PetscInt          *ai = a->i, *ailen = a->ilen;
1293   PetscInt          *aj = a->j, stepval, lastcol = -1;
1294   const PetscScalar *value = v;
1295   MatScalar         *ap, *aa = a->a, *bap;
1296 
1297   PetscFunctionBegin;
1298   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1299   stepval = (n - 1) * 4;
1300   for (k = 0; k < m; k++) { /* loop over added rows */
1301     row  = im[k];
1302     rp   = aj + ai[row];
1303     ap   = aa + 16 * ai[row];
1304     nrow = ailen[row];
1305     low  = 0;
1306     high = nrow;
1307     for (l = 0; l < n; l++) { /* loop over added columns */
1308       col = in[l];
1309       if (col <= lastcol) low = 0;
1310       else high = nrow;
1311       lastcol = col;
1312       value   = v + k * (stepval + 4 + l) * 4;
1313       while (high - low > 7) {
1314         t = (low + high) / 2;
1315         if (rp[t] > col) high = t;
1316         else low = t;
1317       }
1318       for (i = low; i < high; i++) {
1319         if (rp[i] > col) break;
1320         if (rp[i] == col) {
1321           bap = ap + 16 * i;
1322           for (ii = 0; ii < 4; ii++, value += stepval) {
1323             for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1324           }
1325           goto noinsert2;
1326         }
1327       }
1328       N = nrow++ - 1;
1329       high++; /* added new column index thus must search to one higher than before */
1330       /* shift up all the later entries in this row */
1331       for (ii = N; ii >= i; ii--) {
1332         rp[ii + 1] = rp[ii];
1333         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1334       }
1335       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1336       rp[i] = col;
1337       bap   = ap + 16 * i;
1338       for (ii = 0; ii < 4; ii++, value += stepval) {
1339         for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1340       }
1341     noinsert2:;
1342       low = i;
1343     }
1344     ailen[row] = nrow;
1345   }
1346   PetscFunctionReturnVoid();
1347 }
1348 
1349 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1350   #define matsetvalues4_ MATSETVALUES4
1351 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1352   #define matsetvalues4_ matsetvalues4
1353 #endif
1354 
1355 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1356 {
1357   Mat          A = *AA;
1358   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1359   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1360   PetscInt    *ai = a->i, *ailen = a->ilen;
1361   PetscInt    *aj = a->j, brow, bcol;
1362   PetscInt     ridx, cidx, lastcol = -1;
1363   MatScalar   *ap, value, *aa      = a->a, *bap;
1364 
1365   PetscFunctionBegin;
1366   for (k = 0; k < m; k++) { /* loop over added rows */
1367     row  = im[k];
1368     brow = row / 4;
1369     rp   = aj + ai[brow];
1370     ap   = aa + 16 * ai[brow];
1371     nrow = ailen[brow];
1372     low  = 0;
1373     high = nrow;
1374     for (l = 0; l < n; l++) { /* loop over added columns */
1375       col   = in[l];
1376       bcol  = col / 4;
1377       ridx  = row % 4;
1378       cidx  = col % 4;
1379       value = v[l + k * n];
1380       if (col <= lastcol) low = 0;
1381       else high = nrow;
1382       lastcol = col;
1383       while (high - low > 7) {
1384         t = (low + high) / 2;
1385         if (rp[t] > bcol) high = t;
1386         else low = t;
1387       }
1388       for (i = low; i < high; i++) {
1389         if (rp[i] > bcol) break;
1390         if (rp[i] == bcol) {
1391           bap = ap + 16 * i + 4 * cidx + ridx;
1392           *bap += value;
1393           goto noinsert1;
1394         }
1395       }
1396       N = nrow++ - 1;
1397       high++; /* added new column thus must search to one higher than before */
1398       /* shift up all the later entries in this row */
1399       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
1400       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
1401       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1402       rp[i]                        = bcol;
1403       ap[16 * i + 4 * cidx + ridx] = value;
1404     noinsert1:;
1405       low = i;
1406     }
1407     ailen[brow] = nrow;
1408   }
1409   PetscFunctionReturnVoid();
1410 }
1411 
1412 /*
1413      Checks for missing diagonals
1414 */
1415 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1416 {
1417   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1418   PetscInt    *diag, *ii = a->i, i;
1419 
1420   PetscFunctionBegin;
1421   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
1422   *missing = PETSC_FALSE;
1423   if (A->rmap->n > 0 && !ii) {
1424     *missing = PETSC_TRUE;
1425     if (d) *d = 0;
1426     PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n"));
1427   } else {
1428     PetscInt n;
1429     n    = PetscMin(a->mbs, a->nbs);
1430     diag = a->diag;
1431     for (i = 0; i < n; i++) {
1432       if (diag[i] >= ii[i + 1]) {
1433         *missing = PETSC_TRUE;
1434         if (d) *d = i;
1435         PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i));
1436         break;
1437       }
1438     }
1439   }
1440   PetscFunctionReturn(PETSC_SUCCESS);
1441 }
1442 
1443 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1444 {
1445   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1446   PetscInt     i, j, m = a->mbs;
1447 
1448   PetscFunctionBegin;
1449   if (!a->diag) {
1450     PetscCall(PetscMalloc1(m, &a->diag));
1451     a->free_diag = PETSC_TRUE;
1452   }
1453   for (i = 0; i < m; i++) {
1454     a->diag[i] = a->i[i + 1];
1455     for (j = a->i[i]; j < a->i[i + 1]; j++) {
1456       if (a->j[j] == i) {
1457         a->diag[i] = j;
1458         break;
1459       }
1460     }
1461   }
1462   PetscFunctionReturn(PETSC_SUCCESS);
1463 }
1464 
1465 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1466 {
1467   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1468   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
1469   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
1470 
1471   PetscFunctionBegin;
1472   *nn = n;
1473   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1474   if (symmetric) {
1475     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1476     nz = tia[n];
1477   } else {
1478     tia = a->i;
1479     tja = a->j;
1480   }
1481 
1482   if (!blockcompressed && bs > 1) {
1483     (*nn) *= bs;
1484     /* malloc & create the natural set of indices */
1485     PetscCall(PetscMalloc1((n + 1) * bs, ia));
1486     if (n) {
1487       (*ia)[0] = oshift;
1488       for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
1489     }
1490 
1491     for (i = 1; i < n; i++) {
1492       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1493       for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
1494     }
1495     if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1496 
1497     if (inja) {
1498       PetscCall(PetscMalloc1(nz * bs * bs, ja));
1499       cnt = 0;
1500       for (i = 0; i < n; i++) {
1501         for (j = 0; j < bs; j++) {
1502           for (k = tia[i]; k < tia[i + 1]; k++) {
1503             for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
1504           }
1505         }
1506       }
1507     }
1508 
1509     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1510       PetscCall(PetscFree(tia));
1511       PetscCall(PetscFree(tja));
1512     }
1513   } else if (oshift == 1) {
1514     if (symmetric) {
1515       nz = tia[A->rmap->n / bs];
1516       /*  add 1 to i and j indices */
1517       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1518       *ia = tia;
1519       if (ja) {
1520         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1521         *ja = tja;
1522       }
1523     } else {
1524       nz = a->i[A->rmap->n / bs];
1525       /* malloc space and  add 1 to i and j indices */
1526       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1527       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1528       if (ja) {
1529         PetscCall(PetscMalloc1(nz, ja));
1530         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1531       }
1532     }
1533   } else {
1534     *ia = tia;
1535     if (ja) *ja = tja;
1536   }
1537   PetscFunctionReturn(PETSC_SUCCESS);
1538 }
1539 
1540 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1541 {
1542   PetscFunctionBegin;
1543   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1544   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1545     PetscCall(PetscFree(*ia));
1546     if (ja) PetscCall(PetscFree(*ja));
1547   }
1548   PetscFunctionReturn(PETSC_SUCCESS);
1549 }
1550 
1551 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1552 {
1553   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1554 
1555   PetscFunctionBegin;
1556   if (A->hash_active) {
1557     PetscInt bs;
1558     A->ops[0] = a->cops;
1559     PetscCall(PetscHMapIJVDestroy(&a->ht));
1560     PetscCall(MatGetBlockSize(A, &bs));
1561     if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht));
1562     PetscCall(PetscFree(a->dnz));
1563     PetscCall(PetscFree(a->bdnz));
1564     A->hash_active = PETSC_FALSE;
1565   }
1566   PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
1567   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
1568   PetscCall(ISDestroy(&a->row));
1569   PetscCall(ISDestroy(&a->col));
1570   if (a->free_diag) PetscCall(PetscFree(a->diag));
1571   PetscCall(PetscFree(a->idiag));
1572   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
1573   PetscCall(PetscFree(a->solve_work));
1574   PetscCall(PetscFree(a->mult_work));
1575   PetscCall(PetscFree(a->sor_workt));
1576   PetscCall(PetscFree(a->sor_work));
1577   PetscCall(ISDestroy(&a->icol));
1578   PetscCall(PetscFree(a->saved_values));
1579   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1580 
1581   PetscCall(MatDestroy(&a->sbaijMat));
1582   PetscCall(MatDestroy(&a->parent));
1583   PetscCall(PetscFree(A->data));
1584 
1585   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
1586   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
1587   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
1588   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
1589   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
1590   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
1591   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
1592   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
1593   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
1594   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
1595   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
1596   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
1597 #if defined(PETSC_HAVE_HYPRE)
1598   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
1599 #endif
1600   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
1601   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1602   PetscFunctionReturn(PETSC_SUCCESS);
1603 }
1604 
1605 static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1606 {
1607   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1608 
1609   PetscFunctionBegin;
1610   switch (op) {
1611   case MAT_ROW_ORIENTED:
1612     a->roworiented = flg;
1613     break;
1614   case MAT_KEEP_NONZERO_PATTERN:
1615     a->keepnonzeropattern = flg;
1616     break;
1617   case MAT_NEW_NONZERO_LOCATIONS:
1618     a->nonew = (flg ? 0 : 1);
1619     break;
1620   case MAT_NEW_NONZERO_LOCATION_ERR:
1621     a->nonew = (flg ? -1 : 0);
1622     break;
1623   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1624     a->nonew = (flg ? -2 : 0);
1625     break;
1626   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1627     a->nounused = (flg ? -1 : 0);
1628     break;
1629   default:
1630     break;
1631   }
1632   PetscFunctionReturn(PETSC_SUCCESS);
1633 }
1634 
1635 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1636 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1637 {
1638   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
1639   MatScalar   *aa_i;
1640   PetscScalar *v_i;
1641 
1642   PetscFunctionBegin;
1643   bs  = A->rmap->bs;
1644   bs2 = bs * bs;
1645   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1646 
1647   bn  = row / bs; /* Block number */
1648   bp  = row % bs; /* Block Position */
1649   M   = ai[bn + 1] - ai[bn];
1650   *nz = bs * M;
1651 
1652   if (v) {
1653     *v = NULL;
1654     if (*nz) {
1655       PetscCall(PetscMalloc1(*nz, v));
1656       for (i = 0; i < M; i++) { /* for each block in the block row */
1657         v_i  = *v + i * bs;
1658         aa_i = aa + bs2 * (ai[bn] + i);
1659         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
1660       }
1661     }
1662   }
1663 
1664   if (idx) {
1665     *idx = NULL;
1666     if (*nz) {
1667       PetscCall(PetscMalloc1(*nz, idx));
1668       for (i = 0; i < M; i++) { /* for each block in the block row */
1669         idx_i = *idx + i * bs;
1670         itmp  = bs * aj[ai[bn] + i];
1671         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
1672       }
1673     }
1674   }
1675   PetscFunctionReturn(PETSC_SUCCESS);
1676 }
1677 
1678 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1679 {
1680   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1681 
1682   PetscFunctionBegin;
1683   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
1684   PetscFunctionReturn(PETSC_SUCCESS);
1685 }
1686 
1687 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1688 {
1689   PetscFunctionBegin;
1690   if (idx) PetscCall(PetscFree(*idx));
1691   if (v) PetscCall(PetscFree(*v));
1692   PetscFunctionReturn(PETSC_SUCCESS);
1693 }
1694 
1695 static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1696 {
1697   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
1698   Mat          C;
1699   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
1700   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
1701   MatScalar   *ata, *aa = a->a;
1702 
1703   PetscFunctionBegin;
1704   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1705   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1706   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1707     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1708 
1709     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
1710     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
1711     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
1712     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
1713 
1714     at  = (Mat_SeqBAIJ *)C->data;
1715     ati = at->i;
1716     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1717   } else {
1718     C   = *B;
1719     at  = (Mat_SeqBAIJ *)C->data;
1720     ati = at->i;
1721   }
1722 
1723   atj = at->j;
1724   ata = at->a;
1725 
1726   /* Copy ati into atfill so we have locations of the next free space in atj */
1727   PetscCall(PetscArraycpy(atfill, ati, nbs));
1728 
1729   /* Walk through A row-wise and mark nonzero entries of A^T. */
1730   for (i = 0; i < mbs; i++) {
1731     anzj = ai[i + 1] - ai[i];
1732     for (j = 0; j < anzj; j++) {
1733       atj[atfill[*aj]] = i;
1734       for (kr = 0; kr < bs; kr++) {
1735         for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
1736       }
1737       atfill[*aj++] += 1;
1738     }
1739   }
1740   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1741   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1742 
1743   /* Clean up temporary space and complete requests. */
1744   PetscCall(PetscFree(atfill));
1745 
1746   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1747     PetscCall(MatSetBlockSizes(C, A->cmap->bs, A->rmap->bs));
1748     *B = C;
1749   } else {
1750     PetscCall(MatHeaderMerge(A, &C));
1751   }
1752   PetscFunctionReturn(PETSC_SUCCESS);
1753 }
1754 
1755 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1756 {
1757   Mat Btrans;
1758 
1759   PetscFunctionBegin;
1760   *f = PETSC_FALSE;
1761   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
1762   PetscCall(MatEqual_SeqBAIJ(B, Btrans, f));
1763   PetscCall(MatDestroy(&Btrans));
1764   PetscFunctionReturn(PETSC_SUCCESS);
1765 }
1766 
1767 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1768 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1769 {
1770   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1771   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1772   PetscInt    *rowlens, *colidxs;
1773   PetscScalar *matvals;
1774 
1775   PetscFunctionBegin;
1776   PetscCall(PetscViewerSetUp(viewer));
1777 
1778   M  = mat->rmap->N;
1779   N  = mat->cmap->N;
1780   m  = mat->rmap->n;
1781   bs = mat->rmap->bs;
1782   nz = bs * bs * A->nz;
1783 
1784   /* write matrix header */
1785   header[0] = MAT_FILE_CLASSID;
1786   header[1] = M;
1787   header[2] = N;
1788   header[3] = nz;
1789   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1790 
1791   /* store row lengths */
1792   PetscCall(PetscMalloc1(m, &rowlens));
1793   for (cnt = 0, i = 0; i < A->mbs; i++)
1794     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
1795   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
1796   PetscCall(PetscFree(rowlens));
1797 
1798   /* store column indices  */
1799   PetscCall(PetscMalloc1(nz, &colidxs));
1800   for (cnt = 0, i = 0; i < A->mbs; i++)
1801     for (k = 0; k < bs; k++)
1802       for (j = A->i[i]; j < A->i[i + 1]; j++)
1803         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
1804   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1805   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
1806   PetscCall(PetscFree(colidxs));
1807 
1808   /* store nonzero values */
1809   PetscCall(PetscMalloc1(nz, &matvals));
1810   for (cnt = 0, i = 0; i < A->mbs; i++)
1811     for (k = 0; k < bs; k++)
1812       for (j = A->i[i]; j < A->i[i + 1]; j++)
1813         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
1814   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1815   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
1816   PetscCall(PetscFree(matvals));
1817 
1818   /* write block size option to the viewer's .info file */
1819   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1820   PetscFunctionReturn(PETSC_SUCCESS);
1821 }
1822 
1823 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1824 {
1825   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1826   PetscInt     i, bs = A->rmap->bs, k;
1827 
1828   PetscFunctionBegin;
1829   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1830   for (i = 0; i < a->mbs; i++) {
1831     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
1832     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
1833     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1834   }
1835   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1836   PetscFunctionReturn(PETSC_SUCCESS);
1837 }
1838 
1839 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1840 {
1841   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1842   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1843   PetscViewerFormat format;
1844 
1845   PetscFunctionBegin;
1846   if (A->structure_only) {
1847     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
1848     PetscFunctionReturn(PETSC_SUCCESS);
1849   }
1850 
1851   PetscCall(PetscViewerGetFormat(viewer, &format));
1852   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1853     PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1854   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1855     const char *matname;
1856     Mat         aij;
1857     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
1858     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
1859     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
1860     PetscCall(MatView(aij, viewer));
1861     PetscCall(MatDestroy(&aij));
1862   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1863     PetscFunctionReturn(PETSC_SUCCESS);
1864   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1865     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1866     for (i = 0; i < a->mbs; i++) {
1867       for (j = 0; j < bs; j++) {
1868         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1869         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1870           for (l = 0; l < bs; l++) {
1871 #if defined(PETSC_USE_COMPLEX)
1872             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1873               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1874             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1875               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1876             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1877               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1878             }
1879 #else
1880             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1881 #endif
1882           }
1883         }
1884         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1885       }
1886     }
1887     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1888   } else {
1889     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1890     for (i = 0; i < a->mbs; i++) {
1891       for (j = 0; j < bs; j++) {
1892         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1893         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1894           for (l = 0; l < bs; l++) {
1895 #if defined(PETSC_USE_COMPLEX)
1896             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
1897               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1898             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
1899               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1900             } else {
1901               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1902             }
1903 #else
1904             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1905 #endif
1906           }
1907         }
1908         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1909       }
1910     }
1911     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1912   }
1913   PetscCall(PetscViewerFlush(viewer));
1914   PetscFunctionReturn(PETSC_SUCCESS);
1915 }
1916 
1917 #include <petscdraw.h>
1918 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1919 {
1920   Mat               A = (Mat)Aa;
1921   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1922   PetscInt          row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
1923   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1924   MatScalar        *aa;
1925   PetscViewer       viewer;
1926   PetscViewerFormat format;
1927   int               color;
1928 
1929   PetscFunctionBegin;
1930   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
1931   PetscCall(PetscViewerGetFormat(viewer, &format));
1932   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
1933 
1934   /* loop over matrix elements drawing boxes */
1935 
1936   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1937     PetscDrawCollectiveBegin(draw);
1938     /* Blue for negative, Cyan for zero and  Red for positive */
1939     color = PETSC_DRAW_BLUE;
1940     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1941       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1942         y_l = A->rmap->N - row - 1.0;
1943         y_r = y_l + 1.0;
1944         x_l = a->j[j] * bs;
1945         x_r = x_l + 1.0;
1946         aa  = a->a + j * bs2;
1947         for (k = 0; k < bs; k++) {
1948           for (l = 0; l < bs; l++) {
1949             if (PetscRealPart(*aa++) >= 0.) continue;
1950             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1951           }
1952         }
1953       }
1954     }
1955     color = PETSC_DRAW_CYAN;
1956     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1957       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1958         y_l = A->rmap->N - row - 1.0;
1959         y_r = y_l + 1.0;
1960         x_l = a->j[j] * bs;
1961         x_r = x_l + 1.0;
1962         aa  = a->a + j * bs2;
1963         for (k = 0; k < bs; k++) {
1964           for (l = 0; l < bs; l++) {
1965             if (PetscRealPart(*aa++) != 0.) continue;
1966             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1967           }
1968         }
1969       }
1970     }
1971     color = PETSC_DRAW_RED;
1972     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1973       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1974         y_l = A->rmap->N - row - 1.0;
1975         y_r = y_l + 1.0;
1976         x_l = a->j[j] * bs;
1977         x_r = x_l + 1.0;
1978         aa  = a->a + j * bs2;
1979         for (k = 0; k < bs; k++) {
1980           for (l = 0; l < bs; l++) {
1981             if (PetscRealPart(*aa++) <= 0.) continue;
1982             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1983           }
1984         }
1985       }
1986     }
1987     PetscDrawCollectiveEnd(draw);
1988   } else {
1989     /* use contour shading to indicate magnitude of values */
1990     /* first determine max of all nonzero values */
1991     PetscReal minv = 0.0, maxv = 0.0;
1992     PetscDraw popup;
1993 
1994     for (i = 0; i < a->nz * a->bs2; i++) {
1995       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1996     }
1997     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1998     PetscCall(PetscDrawGetPopup(draw, &popup));
1999     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
2000 
2001     PetscDrawCollectiveBegin(draw);
2002     for (i = 0, row = 0; i < mbs; i++, row += bs) {
2003       for (j = a->i[i]; j < a->i[i + 1]; j++) {
2004         y_l = A->rmap->N - row - 1.0;
2005         y_r = y_l + 1.0;
2006         x_l = a->j[j] * bs;
2007         x_r = x_l + 1.0;
2008         aa  = a->a + j * bs2;
2009         for (k = 0; k < bs; k++) {
2010           for (l = 0; l < bs; l++) {
2011             MatScalar v = *aa++;
2012             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
2013             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2014           }
2015         }
2016       }
2017     }
2018     PetscDrawCollectiveEnd(draw);
2019   }
2020   PetscFunctionReturn(PETSC_SUCCESS);
2021 }
2022 
2023 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2024 {
2025   PetscReal xl, yl, xr, yr, w, h;
2026   PetscDraw draw;
2027   PetscBool isnull;
2028 
2029   PetscFunctionBegin;
2030   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
2031   PetscCall(PetscDrawIsNull(draw, &isnull));
2032   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
2033 
2034   xr = A->cmap->n;
2035   yr = A->rmap->N;
2036   h  = yr / 10.0;
2037   w  = xr / 10.0;
2038   xr += w;
2039   yr += h;
2040   xl = -w;
2041   yl = -h;
2042   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
2043   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
2044   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
2045   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
2046   PetscCall(PetscDrawSave(draw));
2047   PetscFunctionReturn(PETSC_SUCCESS);
2048 }
2049 
2050 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2051 {
2052   PetscBool iascii, isbinary, isdraw;
2053 
2054   PetscFunctionBegin;
2055   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2056   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2057   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
2058   if (iascii) {
2059     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
2060   } else if (isbinary) {
2061     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
2062   } else if (isdraw) {
2063     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
2064   } else {
2065     Mat B;
2066     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2067     PetscCall(MatView(B, viewer));
2068     PetscCall(MatDestroy(&B));
2069   }
2070   PetscFunctionReturn(PETSC_SUCCESS);
2071 }
2072 
2073 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2074 {
2075   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2076   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2077   PetscInt    *ai = a->i, *ailen = a->ilen;
2078   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
2079   MatScalar   *ap, *aa = a->a;
2080 
2081   PetscFunctionBegin;
2082   for (k = 0; k < m; k++) { /* loop over rows */
2083     row  = im[k];
2084     brow = row / bs;
2085     if (row < 0) {
2086       v += n;
2087       continue;
2088     } /* negative row */
2089     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2090     rp   = PetscSafePointerPlusOffset(aj, ai[brow]);
2091     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
2092     nrow = ailen[brow];
2093     for (l = 0; l < n; l++) { /* loop over columns */
2094       if (in[l] < 0) {
2095         v++;
2096         continue;
2097       } /* negative column */
2098       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
2099       col  = in[l];
2100       bcol = col / bs;
2101       cidx = col % bs;
2102       ridx = row % bs;
2103       high = nrow;
2104       low  = 0; /* assume unsorted */
2105       while (high - low > 5) {
2106         t = (low + high) / 2;
2107         if (rp[t] > bcol) high = t;
2108         else low = t;
2109       }
2110       for (i = low; i < high; i++) {
2111         if (rp[i] > bcol) break;
2112         if (rp[i] == bcol) {
2113           *v++ = ap[bs2 * i + bs * cidx + ridx];
2114           goto finished;
2115         }
2116       }
2117       *v++ = 0.0;
2118     finished:;
2119     }
2120   }
2121   PetscFunctionReturn(PETSC_SUCCESS);
2122 }
2123 
2124 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2125 {
2126   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2127   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2128   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2129   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2130   PetscBool          roworiented = a->roworiented;
2131   const PetscScalar *value       = v;
2132   MatScalar         *ap = NULL, *aa = a->a, *bap;
2133 
2134   PetscFunctionBegin;
2135   if (roworiented) {
2136     stepval = (n - 1) * bs;
2137   } else {
2138     stepval = (m - 1) * bs;
2139   }
2140   for (k = 0; k < m; k++) { /* loop over added rows */
2141     row = im[k];
2142     if (row < 0) continue;
2143     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
2144     rp = aj + ai[row];
2145     if (!A->structure_only) ap = aa + bs2 * ai[row];
2146     rmax = imax[row];
2147     nrow = ailen[row];
2148     low  = 0;
2149     high = nrow;
2150     for (l = 0; l < n; l++) { /* loop over added columns */
2151       if (in[l] < 0) continue;
2152       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
2153       col = in[l];
2154       if (!A->structure_only) {
2155         if (roworiented) {
2156           value = v + (k * (stepval + bs) + l) * bs;
2157         } else {
2158           value = v + (l * (stepval + bs) + k) * bs;
2159         }
2160       }
2161       if (col <= lastcol) low = 0;
2162       else high = nrow;
2163       lastcol = col;
2164       while (high - low > 7) {
2165         t = (low + high) / 2;
2166         if (rp[t] > col) high = t;
2167         else low = t;
2168       }
2169       for (i = low; i < high; i++) {
2170         if (rp[i] > col) break;
2171         if (rp[i] == col) {
2172           if (A->structure_only) goto noinsert2;
2173           bap = ap + bs2 * i;
2174           if (roworiented) {
2175             if (is == ADD_VALUES) {
2176               for (ii = 0; ii < bs; ii++, value += stepval) {
2177                 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2178               }
2179             } else {
2180               for (ii = 0; ii < bs; ii++, value += stepval) {
2181                 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2182               }
2183             }
2184           } else {
2185             if (is == ADD_VALUES) {
2186               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2187                 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
2188                 bap += bs;
2189               }
2190             } else {
2191               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2192                 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
2193                 bap += bs;
2194               }
2195             }
2196           }
2197           goto noinsert2;
2198         }
2199       }
2200       if (nonew == 1) goto noinsert2;
2201       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2202       if (A->structure_only) {
2203         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
2204       } else {
2205         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2206       }
2207       N = nrow++ - 1;
2208       high++;
2209       /* shift up all the later entries in this row */
2210       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2211       rp[i] = col;
2212       if (!A->structure_only) {
2213         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2214         bap = ap + bs2 * i;
2215         if (roworiented) {
2216           for (ii = 0; ii < bs; ii++, value += stepval) {
2217             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2218           }
2219         } else {
2220           for (ii = 0; ii < bs; ii++, value += stepval) {
2221             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2222           }
2223         }
2224       }
2225     noinsert2:;
2226       low = i;
2227     }
2228     ailen[row] = nrow;
2229   }
2230   PetscFunctionReturn(PETSC_SUCCESS);
2231 }
2232 
2233 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2234 {
2235   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2236   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2237   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2238   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2239   MatScalar   *aa    = a->a, *ap;
2240   PetscReal    ratio = 0.6;
2241 
2242   PetscFunctionBegin;
2243   if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS);
2244 
2245   if (m) rmax = ailen[0];
2246   for (i = 1; i < mbs; i++) {
2247     /* move each row back by the amount of empty slots (fshift) before it*/
2248     fshift += imax[i - 1] - ailen[i - 1];
2249     rmax = PetscMax(rmax, ailen[i]);
2250     if (fshift) {
2251       ip = aj + ai[i];
2252       ap = aa + bs2 * ai[i];
2253       N  = ailen[i];
2254       PetscCall(PetscArraymove(ip - fshift, ip, N));
2255       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2256     }
2257     ai[i] = ai[i - 1] + ailen[i - 1];
2258   }
2259   if (mbs) {
2260     fshift += imax[mbs - 1] - ailen[mbs - 1];
2261     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2262   }
2263 
2264   /* reset ilen and imax for each row */
2265   a->nonzerorowcnt = 0;
2266   if (A->structure_only) {
2267     PetscCall(PetscFree2(a->imax, a->ilen));
2268   } else { /* !A->structure_only */
2269     for (i = 0; i < mbs; i++) {
2270       ailen[i] = imax[i] = ai[i + 1] - ai[i];
2271       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2272     }
2273   }
2274   a->nz = ai[mbs];
2275 
2276   /* diagonals may have moved, so kill the diagonal pointers */
2277   a->idiagvalid = PETSC_FALSE;
2278   if (fshift && a->diag) PetscCall(PetscFree(a->diag));
2279   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
2280   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
2281   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
2282   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
2283 
2284   A->info.mallocs += a->reallocs;
2285   a->reallocs         = 0;
2286   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2287   a->rmax             = rmax;
2288 
2289   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
2290   PetscFunctionReturn(PETSC_SUCCESS);
2291 }
2292 
2293 /*
2294    This function returns an array of flags which indicate the locations of contiguous
2295    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2296    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2297    Assume: sizes should be long enough to hold all the values.
2298 */
2299 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2300 {
2301   PetscInt j = 0;
2302 
2303   PetscFunctionBegin;
2304   for (PetscInt i = 0; i < n; j++) {
2305     PetscInt row = idx[i];
2306     if (row % bs != 0) { /* Not the beginning of a block */
2307       sizes[j] = 1;
2308       i++;
2309     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2310       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2311       i++;
2312     } else { /* Beginning of the block, so check if the complete block exists */
2313       PetscBool flg = PETSC_TRUE;
2314       for (PetscInt k = 1; k < bs; k++) {
2315         if (row + k != idx[i + k]) { /* break in the block */
2316           flg = PETSC_FALSE;
2317           break;
2318         }
2319       }
2320       if (flg) { /* No break in the bs */
2321         sizes[j] = bs;
2322         i += bs;
2323       } else {
2324         sizes[j] = 1;
2325         i++;
2326       }
2327     }
2328   }
2329   *bs_max = j;
2330   PetscFunctionReturn(PETSC_SUCCESS);
2331 }
2332 
2333 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2334 {
2335   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2336   PetscInt           i, j, k, count, *rows;
2337   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
2338   PetscScalar        zero = 0.0;
2339   MatScalar         *aa;
2340   const PetscScalar *xx;
2341   PetscScalar       *bb;
2342 
2343   PetscFunctionBegin;
2344   /* fix right-hand side if needed */
2345   if (x && b) {
2346     PetscCall(VecGetArrayRead(x, &xx));
2347     PetscCall(VecGetArray(b, &bb));
2348     for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
2349     PetscCall(VecRestoreArrayRead(x, &xx));
2350     PetscCall(VecRestoreArray(b, &bb));
2351   }
2352 
2353   /* Make a copy of the IS and  sort it */
2354   /* allocate memory for rows,sizes */
2355   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2356 
2357   /* copy IS values to rows, and sort them */
2358   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
2359   PetscCall(PetscSortInt(is_n, rows));
2360 
2361   if (baij->keepnonzeropattern) {
2362     for (i = 0; i < is_n; i++) sizes[i] = 1;
2363     bs_max = is_n;
2364   } else {
2365     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2366     A->nonzerostate++;
2367   }
2368 
2369   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2370     row = rows[j];
2371     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2372     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2373     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2374     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2375       if (diag != (PetscScalar)0.0) {
2376         if (baij->ilen[row / bs] > 0) {
2377           baij->ilen[row / bs]       = 1;
2378           baij->j[baij->i[row / bs]] = row / bs;
2379 
2380           PetscCall(PetscArrayzero(aa, count * bs));
2381         }
2382         /* Now insert all the diagonal values for this bs */
2383         for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES);
2384       } else { /* (diag == 0.0) */
2385         baij->ilen[row / bs] = 0;
2386       } /* end (diag == 0.0) */
2387     } else { /* (sizes[i] != bs) */
2388       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2389       for (k = 0; k < count; k++) {
2390         aa[0] = zero;
2391         aa += bs;
2392       }
2393       if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES);
2394     }
2395   }
2396 
2397   PetscCall(PetscFree2(rows, sizes));
2398   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2399   PetscFunctionReturn(PETSC_SUCCESS);
2400 }
2401 
2402 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2403 {
2404   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2405   PetscInt           i, j, k, count;
2406   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
2407   PetscScalar        zero = 0.0;
2408   MatScalar         *aa;
2409   const PetscScalar *xx;
2410   PetscScalar       *bb;
2411   PetscBool         *zeroed, vecs = PETSC_FALSE;
2412 
2413   PetscFunctionBegin;
2414   /* fix right-hand side if needed */
2415   if (x && b) {
2416     PetscCall(VecGetArrayRead(x, &xx));
2417     PetscCall(VecGetArray(b, &bb));
2418     vecs = PETSC_TRUE;
2419   }
2420 
2421   /* zero the columns */
2422   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
2423   for (i = 0; i < is_n; i++) {
2424     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
2425     zeroed[is_idx[i]] = PETSC_TRUE;
2426   }
2427   for (i = 0; i < A->rmap->N; i++) {
2428     if (!zeroed[i]) {
2429       row = i / bs;
2430       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
2431         for (k = 0; k < bs; k++) {
2432           col = bs * baij->j[j] + k;
2433           if (zeroed[col]) {
2434             aa = baij->a + j * bs2 + (i % bs) + bs * k;
2435             if (vecs) bb[i] -= aa[0] * xx[col];
2436             aa[0] = 0.0;
2437           }
2438         }
2439       }
2440     } else if (vecs) bb[i] = diag * xx[i];
2441   }
2442   PetscCall(PetscFree(zeroed));
2443   if (vecs) {
2444     PetscCall(VecRestoreArrayRead(x, &xx));
2445     PetscCall(VecRestoreArray(b, &bb));
2446   }
2447 
2448   /* zero the rows */
2449   for (i = 0; i < is_n; i++) {
2450     row   = is_idx[i];
2451     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2452     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2453     for (k = 0; k < count; k++) {
2454       aa[0] = zero;
2455       aa += bs;
2456     }
2457     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
2458   }
2459   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2460   PetscFunctionReturn(PETSC_SUCCESS);
2461 }
2462 
2463 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2464 {
2465   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2466   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2467   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2468   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2469   PetscInt     ridx, cidx, bs2                 = a->bs2;
2470   PetscBool    roworiented = a->roworiented;
2471   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;
2472 
2473   PetscFunctionBegin;
2474   for (k = 0; k < m; k++) { /* loop over added rows */
2475     row  = im[k];
2476     brow = row / bs;
2477     if (row < 0) continue;
2478     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
2479     rp = PetscSafePointerPlusOffset(aj, ai[brow]);
2480     if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
2481     rmax = imax[brow];
2482     nrow = ailen[brow];
2483     low  = 0;
2484     high = nrow;
2485     for (l = 0; l < n; l++) { /* loop over added columns */
2486       if (in[l] < 0) continue;
2487       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
2488       col  = in[l];
2489       bcol = col / bs;
2490       ridx = row % bs;
2491       cidx = col % bs;
2492       if (!A->structure_only) {
2493         if (roworiented) {
2494           value = v[l + k * n];
2495         } else {
2496           value = v[k + l * m];
2497         }
2498       }
2499       if (col <= lastcol) low = 0;
2500       else high = nrow;
2501       lastcol = col;
2502       while (high - low > 7) {
2503         t = (low + high) / 2;
2504         if (rp[t] > bcol) high = t;
2505         else low = t;
2506       }
2507       for (i = low; i < high; i++) {
2508         if (rp[i] > bcol) break;
2509         if (rp[i] == bcol) {
2510           bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx);
2511           if (!A->structure_only) {
2512             if (is == ADD_VALUES) *bap += value;
2513             else *bap = value;
2514           }
2515           goto noinsert1;
2516         }
2517       }
2518       if (nonew == 1) goto noinsert1;
2519       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2520       if (A->structure_only) {
2521         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2522       } else {
2523         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2524       }
2525       N = nrow++ - 1;
2526       high++;
2527       /* shift up all the later entries in this row */
2528       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2529       rp[i] = bcol;
2530       if (!A->structure_only) {
2531         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2532         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2533         ap[bs2 * i + bs * cidx + ridx] = value;
2534       }
2535       a->nz++;
2536     noinsert1:;
2537       low = i;
2538     }
2539     ailen[brow] = nrow;
2540   }
2541   PetscFunctionReturn(PETSC_SUCCESS);
2542 }
2543 
2544 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2545 {
2546   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
2547   Mat          outA;
2548   PetscBool    row_identity, col_identity;
2549 
2550   PetscFunctionBegin;
2551   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
2552   PetscCall(ISIdentity(row, &row_identity));
2553   PetscCall(ISIdentity(col, &col_identity));
2554   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
2555 
2556   outA            = inA;
2557   inA->factortype = MAT_FACTOR_LU;
2558   PetscCall(PetscFree(inA->solvertype));
2559   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
2560 
2561   PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2562 
2563   PetscCall(PetscObjectReference((PetscObject)row));
2564   PetscCall(ISDestroy(&a->row));
2565   a->row = row;
2566   PetscCall(PetscObjectReference((PetscObject)col));
2567   PetscCall(ISDestroy(&a->col));
2568   a->col = col;
2569 
2570   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2571   PetscCall(ISDestroy(&a->icol));
2572   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2573 
2574   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2575   if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
2576   PetscCall(MatLUFactorNumeric(outA, inA, info));
2577   PetscFunctionReturn(PETSC_SUCCESS);
2578 }
2579 
2580 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2581 {
2582   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2583 
2584   PetscFunctionBegin;
2585   baij->nz = baij->maxnz;
2586   PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2587   PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
2588   PetscFunctionReturn(PETSC_SUCCESS);
2589 }
2590 
2591 /*@
2592   MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix.
2593 
2594   Input Parameters:
2595 + mat     - the `MATSEQBAIJ` matrix
2596 - indices - the block column indices
2597 
2598   Level: advanced
2599 
2600   Notes:
2601   This can be called if you have precomputed the nonzero structure of the
2602   matrix and want to provide it to the matrix object to improve the performance
2603   of the `MatSetValues()` operation.
2604 
2605   You MUST have set the correct numbers of nonzeros per row in the call to
2606   `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
2607 
2608   MUST be called before any calls to `MatSetValues()`
2609 
2610 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()`
2611 @*/
2612 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2613 {
2614   PetscFunctionBegin;
2615   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2616   PetscAssertPointer(indices, 2);
2617   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices));
2618   PetscFunctionReturn(PETSC_SUCCESS);
2619 }
2620 
2621 static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2622 {
2623   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2624   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2625   PetscReal    atmp;
2626   PetscScalar *x, zero = 0.0;
2627   MatScalar   *aa;
2628   PetscInt     ncols, brow, krow, kcol;
2629 
2630   PetscFunctionBegin;
2631   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2632   bs  = A->rmap->bs;
2633   aa  = a->a;
2634   ai  = a->i;
2635   aj  = a->j;
2636   mbs = a->mbs;
2637 
2638   PetscCall(VecSet(v, zero));
2639   PetscCall(VecGetArray(v, &x));
2640   PetscCall(VecGetLocalSize(v, &n));
2641   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2642   for (i = 0; i < mbs; i++) {
2643     ncols = ai[1] - ai[0];
2644     ai++;
2645     brow = bs * i;
2646     for (j = 0; j < ncols; j++) {
2647       for (kcol = 0; kcol < bs; kcol++) {
2648         for (krow = 0; krow < bs; krow++) {
2649           atmp = PetscAbsScalar(*aa);
2650           aa++;
2651           row = brow + krow; /* row index */
2652           if (PetscAbsScalar(x[row]) < atmp) {
2653             x[row] = atmp;
2654             if (idx) idx[row] = bs * (*aj) + kcol;
2655           }
2656         }
2657       }
2658       aj++;
2659     }
2660   }
2661   PetscCall(VecRestoreArray(v, &x));
2662   PetscFunctionReturn(PETSC_SUCCESS);
2663 }
2664 
2665 static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v)
2666 {
2667   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2668   PetscInt     i, j, n, row, bs, *ai, mbs;
2669   PetscReal    atmp;
2670   PetscScalar *x, zero = 0.0;
2671   MatScalar   *aa;
2672   PetscInt     ncols, brow, krow, kcol;
2673 
2674   PetscFunctionBegin;
2675   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2676   bs  = A->rmap->bs;
2677   aa  = a->a;
2678   ai  = a->i;
2679   mbs = a->mbs;
2680 
2681   PetscCall(VecSet(v, zero));
2682   PetscCall(VecGetArrayWrite(v, &x));
2683   PetscCall(VecGetLocalSize(v, &n));
2684   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2685   for (i = 0; i < mbs; i++) {
2686     ncols = ai[1] - ai[0];
2687     ai++;
2688     brow = bs * i;
2689     for (j = 0; j < ncols; j++) {
2690       for (kcol = 0; kcol < bs; kcol++) {
2691         for (krow = 0; krow < bs; krow++) {
2692           atmp = PetscAbsScalar(*aa);
2693           aa++;
2694           row = brow + krow; /* row index */
2695           x[row] += atmp;
2696         }
2697       }
2698     }
2699   }
2700   PetscCall(VecRestoreArrayWrite(v, &x));
2701   PetscFunctionReturn(PETSC_SUCCESS);
2702 }
2703 
2704 static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2705 {
2706   PetscFunctionBegin;
2707   /* If the two matrices have the same copy implementation, use fast copy. */
2708   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2709     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
2710     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2711     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
2712 
2713     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
2714     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
2715     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
2716     PetscCall(PetscObjectStateIncrease((PetscObject)B));
2717   } else {
2718     PetscCall(MatCopy_Basic(A, B, str));
2719   }
2720   PetscFunctionReturn(PETSC_SUCCESS);
2721 }
2722 
2723 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2724 {
2725   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2726 
2727   PetscFunctionBegin;
2728   *array = a->a;
2729   PetscFunctionReturn(PETSC_SUCCESS);
2730 }
2731 
2732 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2733 {
2734   PetscFunctionBegin;
2735   *array = NULL;
2736   PetscFunctionReturn(PETSC_SUCCESS);
2737 }
2738 
2739 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2740 {
2741   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
2742   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
2743   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
2744 
2745   PetscFunctionBegin;
2746   /* Set the number of nonzeros in the new matrix */
2747   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
2748   PetscFunctionReturn(PETSC_SUCCESS);
2749 }
2750 
2751 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2752 {
2753   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
2754   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2755   PetscBLASInt one = 1;
2756 
2757   PetscFunctionBegin;
2758   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2759     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2760     if (e) {
2761       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2762       if (e) {
2763         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2764         if (e) str = SAME_NONZERO_PATTERN;
2765       }
2766     }
2767     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2768   }
2769   if (str == SAME_NONZERO_PATTERN) {
2770     PetscScalar  alpha = a;
2771     PetscBLASInt bnz;
2772     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2773     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
2774     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2775   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2776     PetscCall(MatAXPY_Basic(Y, a, X, str));
2777   } else {
2778     Mat       B;
2779     PetscInt *nnz;
2780     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
2781     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
2782     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2783     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2784     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
2785     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
2786     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
2787     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
2788     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
2789     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2790     PetscCall(MatHeaderMerge(Y, &B));
2791     PetscCall(PetscFree(nnz));
2792   }
2793   PetscFunctionReturn(PETSC_SUCCESS);
2794 }
2795 
2796 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2797 {
2798 #if PetscDefined(USE_COMPLEX)
2799   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2800   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2801   MatScalar   *aa = a->a;
2802 
2803   PetscFunctionBegin;
2804   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
2805   PetscFunctionReturn(PETSC_SUCCESS);
2806 #else
2807   (void)A;
2808   return PETSC_SUCCESS;
2809 #endif
2810 }
2811 
2812 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2813 {
2814 #if PetscDefined(USE_COMPLEX)
2815   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2816   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2817   MatScalar   *aa = a->a;
2818 
2819   PetscFunctionBegin;
2820   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
2821   PetscFunctionReturn(PETSC_SUCCESS);
2822 #else
2823   (void)A;
2824   return PETSC_SUCCESS;
2825 #endif
2826 }
2827 
2828 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2829 {
2830 #if PetscDefined(USE_COMPLEX)
2831   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2832   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2833   MatScalar   *aa = a->a;
2834 
2835   PetscFunctionBegin;
2836   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2837   PetscFunctionReturn(PETSC_SUCCESS);
2838 #else
2839   (void)A;
2840   return PETSC_SUCCESS;
2841 #endif
2842 }
2843 
2844 /*
2845     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2846 */
2847 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2848 {
2849   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
2850   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
2851   PetscInt     nz = a->i[m], row, *jj, mr, col;
2852 
2853   PetscFunctionBegin;
2854   *nn = n;
2855   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2856   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
2857   PetscCall(PetscCalloc1(n, &collengths));
2858   PetscCall(PetscMalloc1(n + 1, &cia));
2859   PetscCall(PetscMalloc1(nz, &cja));
2860   jj = a->j;
2861   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2862   cia[0] = oshift;
2863   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2864   PetscCall(PetscArrayzero(collengths, n));
2865   jj = a->j;
2866   for (row = 0; row < m; row++) {
2867     mr = a->i[row + 1] - a->i[row];
2868     for (i = 0; i < mr; i++) {
2869       col = *jj++;
2870 
2871       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2872     }
2873   }
2874   PetscCall(PetscFree(collengths));
2875   *ia = cia;
2876   *ja = cja;
2877   PetscFunctionReturn(PETSC_SUCCESS);
2878 }
2879 
2880 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2881 {
2882   PetscFunctionBegin;
2883   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2884   PetscCall(PetscFree(*ia));
2885   PetscCall(PetscFree(*ja));
2886   PetscFunctionReturn(PETSC_SUCCESS);
2887 }
2888 
2889 /*
2890  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2891  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2892  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2893  */
2894 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2895 {
2896   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2897   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2898   PetscInt     nz = a->i[m], row, *jj, mr, col;
2899   PetscInt    *cspidx;
2900 
2901   PetscFunctionBegin;
2902   *nn = n;
2903   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2904 
2905   PetscCall(PetscCalloc1(n, &collengths));
2906   PetscCall(PetscMalloc1(n + 1, &cia));
2907   PetscCall(PetscMalloc1(nz, &cja));
2908   PetscCall(PetscMalloc1(nz, &cspidx));
2909   jj = a->j;
2910   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2911   cia[0] = oshift;
2912   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2913   PetscCall(PetscArrayzero(collengths, n));
2914   jj = a->j;
2915   for (row = 0; row < m; row++) {
2916     mr = a->i[row + 1] - a->i[row];
2917     for (i = 0; i < mr; i++) {
2918       col                                         = *jj++;
2919       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2920       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2921     }
2922   }
2923   PetscCall(PetscFree(collengths));
2924   *ia    = cia;
2925   *ja    = cja;
2926   *spidx = cspidx;
2927   PetscFunctionReturn(PETSC_SUCCESS);
2928 }
2929 
2930 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2931 {
2932   PetscFunctionBegin;
2933   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
2934   PetscCall(PetscFree(*spidx));
2935   PetscFunctionReturn(PETSC_SUCCESS);
2936 }
2937 
2938 static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2939 {
2940   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
2941 
2942   PetscFunctionBegin;
2943   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
2944   PetscCall(MatShift_Basic(Y, a));
2945   PetscFunctionReturn(PETSC_SUCCESS);
2946 }
2947 
2948 PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep)
2949 {
2950   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2951   PetscInt     fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k;
2952   PetscInt     m = A->rmap->N, *ailen = a->ilen;
2953   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2954   MatScalar   *aa = a->a, *ap;
2955   PetscBool    zero;
2956 
2957   PetscFunctionBegin;
2958   PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix");
2959   if (m) rmax = ailen[0];
2960   for (i = 1; i <= mbs; i++) {
2961     for (k = ai[i - 1]; k < ai[i]; k++) {
2962       zero = PETSC_TRUE;
2963       ap   = aa + bs2 * k;
2964       for (j = 0; j < bs2 && zero; j++) {
2965         if (ap[j] != 0.0) zero = PETSC_FALSE;
2966       }
2967       if (zero && (aj[k] != i - 1 || !keep)) fshift++;
2968       else {
2969         if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1));
2970         aj[k - fshift] = aj[k];
2971         PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2));
2972       }
2973     }
2974     ai[i - 1] -= fshift_prev;
2975     fshift_prev  = fshift;
2976     ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1];
2977     a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0);
2978     rmax = PetscMax(rmax, ailen[i - 1]);
2979   }
2980   if (fshift) {
2981     if (mbs) {
2982       ai[mbs] -= fshift;
2983       a->nz = ai[mbs];
2984     }
2985     PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz));
2986     A->nonzerostate++;
2987     A->info.nz_unneeded += (PetscReal)fshift;
2988     a->rmax = rmax;
2989     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2990     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
2991   }
2992   PetscFunctionReturn(PETSC_SUCCESS);
2993 }
2994 
2995 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2996                                        MatGetRow_SeqBAIJ,
2997                                        MatRestoreRow_SeqBAIJ,
2998                                        MatMult_SeqBAIJ_N,
2999                                        /* 4*/ MatMultAdd_SeqBAIJ_N,
3000                                        MatMultTranspose_SeqBAIJ,
3001                                        MatMultTransposeAdd_SeqBAIJ,
3002                                        NULL,
3003                                        NULL,
3004                                        NULL,
3005                                        /* 10*/ NULL,
3006                                        MatLUFactor_SeqBAIJ,
3007                                        NULL,
3008                                        NULL,
3009                                        MatTranspose_SeqBAIJ,
3010                                        /* 15*/ MatGetInfo_SeqBAIJ,
3011                                        MatEqual_SeqBAIJ,
3012                                        MatGetDiagonal_SeqBAIJ,
3013                                        MatDiagonalScale_SeqBAIJ,
3014                                        MatNorm_SeqBAIJ,
3015                                        /* 20*/ NULL,
3016                                        MatAssemblyEnd_SeqBAIJ,
3017                                        MatSetOption_SeqBAIJ,
3018                                        MatZeroEntries_SeqBAIJ,
3019                                        /* 24*/ MatZeroRows_SeqBAIJ,
3020                                        NULL,
3021                                        NULL,
3022                                        NULL,
3023                                        NULL,
3024                                        /* 29*/ MatSetUp_Seq_Hash,
3025                                        NULL,
3026                                        NULL,
3027                                        NULL,
3028                                        NULL,
3029                                        /* 34*/ MatDuplicate_SeqBAIJ,
3030                                        NULL,
3031                                        NULL,
3032                                        MatILUFactor_SeqBAIJ,
3033                                        NULL,
3034                                        /* 39*/ MatAXPY_SeqBAIJ,
3035                                        MatCreateSubMatrices_SeqBAIJ,
3036                                        MatIncreaseOverlap_SeqBAIJ,
3037                                        MatGetValues_SeqBAIJ,
3038                                        MatCopy_SeqBAIJ,
3039                                        /* 44*/ NULL,
3040                                        MatScale_SeqBAIJ,
3041                                        MatShift_SeqBAIJ,
3042                                        NULL,
3043                                        MatZeroRowsColumns_SeqBAIJ,
3044                                        /* 49*/ NULL,
3045                                        MatGetRowIJ_SeqBAIJ,
3046                                        MatRestoreRowIJ_SeqBAIJ,
3047                                        MatGetColumnIJ_SeqBAIJ,
3048                                        MatRestoreColumnIJ_SeqBAIJ,
3049                                        /* 54*/ MatFDColoringCreate_SeqXAIJ,
3050                                        NULL,
3051                                        NULL,
3052                                        NULL,
3053                                        MatSetValuesBlocked_SeqBAIJ,
3054                                        /* 59*/ MatCreateSubMatrix_SeqBAIJ,
3055                                        MatDestroy_SeqBAIJ,
3056                                        MatView_SeqBAIJ,
3057                                        NULL,
3058                                        NULL,
3059                                        /* 64*/ NULL,
3060                                        NULL,
3061                                        NULL,
3062                                        NULL,
3063                                        NULL,
3064                                        /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
3065                                        NULL,
3066                                        MatConvert_Basic,
3067                                        NULL,
3068                                        NULL,
3069                                        /* 74*/ NULL,
3070                                        MatFDColoringApply_BAIJ,
3071                                        NULL,
3072                                        NULL,
3073                                        NULL,
3074                                        /* 79*/ NULL,
3075                                        NULL,
3076                                        NULL,
3077                                        NULL,
3078                                        MatLoad_SeqBAIJ,
3079                                        /* 84*/ NULL,
3080                                        NULL,
3081                                        NULL,
3082                                        NULL,
3083                                        NULL,
3084                                        /* 89*/ NULL,
3085                                        NULL,
3086                                        NULL,
3087                                        NULL,
3088                                        NULL,
3089                                        /* 94*/ NULL,
3090                                        NULL,
3091                                        NULL,
3092                                        NULL,
3093                                        NULL,
3094                                        /* 99*/ NULL,
3095                                        NULL,
3096                                        NULL,
3097                                        MatConjugate_SeqBAIJ,
3098                                        NULL,
3099                                        /*104*/ NULL,
3100                                        MatRealPart_SeqBAIJ,
3101                                        MatImaginaryPart_SeqBAIJ,
3102                                        NULL,
3103                                        NULL,
3104                                        /*109*/ NULL,
3105                                        NULL,
3106                                        NULL,
3107                                        NULL,
3108                                        MatMissingDiagonal_SeqBAIJ,
3109                                        /*114*/ NULL,
3110                                        NULL,
3111                                        NULL,
3112                                        NULL,
3113                                        NULL,
3114                                        /*119*/ NULL,
3115                                        NULL,
3116                                        MatMultHermitianTranspose_SeqBAIJ,
3117                                        MatMultHermitianTransposeAdd_SeqBAIJ,
3118                                        NULL,
3119                                        /*124*/ NULL,
3120                                        MatGetColumnReductions_SeqBAIJ,
3121                                        MatInvertBlockDiagonal_SeqBAIJ,
3122                                        NULL,
3123                                        NULL,
3124                                        /*129*/ NULL,
3125                                        NULL,
3126                                        NULL,
3127                                        NULL,
3128                                        NULL,
3129                                        /*134*/ NULL,
3130                                        NULL,
3131                                        NULL,
3132                                        NULL,
3133                                        NULL,
3134                                        /*139*/ MatSetBlockSizes_Default,
3135                                        NULL,
3136                                        NULL,
3137                                        MatFDColoringSetUp_SeqXAIJ,
3138                                        NULL,
3139                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3140                                        MatDestroySubMatrices_SeqBAIJ,
3141                                        NULL,
3142                                        NULL,
3143                                        NULL,
3144                                        NULL,
3145                                        /*150*/ NULL,
3146                                        MatEliminateZeros_SeqBAIJ,
3147                                        MatGetRowSumAbs_SeqBAIJ,
3148                                        NULL,
3149                                        NULL,
3150                                        /*155*/ NULL,
3151                                        MatCopyHashToXAIJ_Seq_Hash};
3152 
3153 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3154 {
3155   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3156   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3157 
3158   PetscFunctionBegin;
3159   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3160 
3161   /* allocate space for values if not already there */
3162   if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
3163 
3164   /* copy values over */
3165   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
3166   PetscFunctionReturn(PETSC_SUCCESS);
3167 }
3168 
3169 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3170 {
3171   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3172   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3173 
3174   PetscFunctionBegin;
3175   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3176   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
3177 
3178   /* copy values over */
3179   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
3180   PetscFunctionReturn(PETSC_SUCCESS);
3181 }
3182 
3183 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3184 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3185 
3186 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3187 {
3188   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
3189   PetscInt     i, mbs, nbs, bs2;
3190   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3191 
3192   PetscFunctionBegin;
3193   if (B->hash_active) {
3194     PetscInt bs;
3195     B->ops[0] = b->cops;
3196     PetscCall(PetscHMapIJVDestroy(&b->ht));
3197     PetscCall(MatGetBlockSize(B, &bs));
3198     if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht));
3199     PetscCall(PetscFree(b->dnz));
3200     PetscCall(PetscFree(b->bdnz));
3201     B->hash_active = PETSC_FALSE;
3202   }
3203   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3204   if (nz == MAT_SKIP_ALLOCATION) {
3205     skipallocation = PETSC_TRUE;
3206     nz             = 0;
3207   }
3208 
3209   PetscCall(MatSetBlockSize(B, bs));
3210   PetscCall(PetscLayoutSetUp(B->rmap));
3211   PetscCall(PetscLayoutSetUp(B->cmap));
3212   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3213 
3214   B->preallocated = PETSC_TRUE;
3215 
3216   mbs = B->rmap->n / bs;
3217   nbs = B->cmap->n / bs;
3218   bs2 = bs * bs;
3219 
3220   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3221 
3222   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3223   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3224   if (nnz) {
3225     for (i = 0; i < mbs; i++) {
3226       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
3227       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3228     }
3229   }
3230 
3231   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
3232   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3233   PetscOptionsEnd();
3234 
3235   if (!flg) {
3236     switch (bs) {
3237     case 1:
3238       B->ops->mult    = MatMult_SeqBAIJ_1;
3239       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3240       break;
3241     case 2:
3242       B->ops->mult    = MatMult_SeqBAIJ_2;
3243       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3244       break;
3245     case 3:
3246       B->ops->mult    = MatMult_SeqBAIJ_3;
3247       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3248       break;
3249     case 4:
3250       B->ops->mult    = MatMult_SeqBAIJ_4;
3251       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3252       break;
3253     case 5:
3254       B->ops->mult    = MatMult_SeqBAIJ_5;
3255       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3256       break;
3257     case 6:
3258       B->ops->mult    = MatMult_SeqBAIJ_6;
3259       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3260       break;
3261     case 7:
3262       B->ops->mult    = MatMult_SeqBAIJ_7;
3263       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3264       break;
3265     case 9: {
3266       PetscInt version = 1;
3267       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3268       switch (version) {
3269 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3270       case 1:
3271         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
3272         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
3273         PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3274         break;
3275 #endif
3276       default:
3277         B->ops->mult    = MatMult_SeqBAIJ_N;
3278         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3279         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3280         break;
3281       }
3282       break;
3283     }
3284     case 11:
3285       B->ops->mult    = MatMult_SeqBAIJ_11;
3286       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3287       break;
3288     case 12: {
3289       PetscInt version = 1;
3290       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3291       switch (version) {
3292       case 1:
3293         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
3294         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3295         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3296         break;
3297       case 2:
3298         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
3299         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
3300         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3301         break;
3302 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3303       case 3:
3304         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
3305         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3306         PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3307         break;
3308 #endif
3309       default:
3310         B->ops->mult    = MatMult_SeqBAIJ_N;
3311         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3312         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3313         break;
3314       }
3315       break;
3316     }
3317     case 15: {
3318       PetscInt version = 1;
3319       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3320       switch (version) {
3321       case 1:
3322         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3323         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3324         break;
3325       case 2:
3326         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
3327         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3328         break;
3329       case 3:
3330         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3331         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3332         break;
3333       case 4:
3334         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3335         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3336         break;
3337       default:
3338         B->ops->mult = MatMult_SeqBAIJ_N;
3339         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3340         break;
3341       }
3342       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3343       break;
3344     }
3345     default:
3346       B->ops->mult    = MatMult_SeqBAIJ_N;
3347       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3348       PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3349       break;
3350     }
3351   }
3352   B->ops->sor = MatSOR_SeqBAIJ;
3353   b->mbs      = mbs;
3354   b->nbs      = nbs;
3355   if (!skipallocation) {
3356     if (!b->imax) {
3357       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
3358 
3359       b->free_imax_ilen = PETSC_TRUE;
3360     }
3361     /* b->ilen will count nonzeros in each block row so far. */
3362     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3363     if (!nnz) {
3364       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3365       else if (nz < 0) nz = 1;
3366       nz = PetscMin(nz, nbs);
3367       for (i = 0; i < mbs; i++) b->imax[i] = nz;
3368       PetscCall(PetscIntMultError(nz, mbs, &nz));
3369     } else {
3370       PetscInt64 nz64 = 0;
3371       for (i = 0; i < mbs; i++) {
3372         b->imax[i] = nnz[i];
3373         nz64 += nnz[i];
3374       }
3375       PetscCall(PetscIntCast(nz64, &nz));
3376     }
3377 
3378     /* allocate the matrix space */
3379     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3380     PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j));
3381     PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i));
3382     if (B->structure_only) {
3383       b->free_a = PETSC_FALSE;
3384     } else {
3385       PetscInt nzbs2 = 0;
3386       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
3387       PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a));
3388       b->free_a = PETSC_TRUE;
3389       PetscCall(PetscArrayzero(b->a, nz * bs2));
3390     }
3391     b->free_ij = PETSC_TRUE;
3392     PetscCall(PetscArrayzero(b->j, nz));
3393 
3394     b->i[0] = 0;
3395     for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3396   } else {
3397     b->free_a  = PETSC_FALSE;
3398     b->free_ij = PETSC_FALSE;
3399   }
3400 
3401   b->bs2              = bs2;
3402   b->mbs              = mbs;
3403   b->nz               = 0;
3404   b->maxnz            = nz;
3405   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3406   B->was_assembled    = PETSC_FALSE;
3407   B->assembled        = PETSC_FALSE;
3408   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3409   PetscFunctionReturn(PETSC_SUCCESS);
3410 }
3411 
3412 static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3413 {
3414   PetscInt     i, m, nz, nz_max = 0, *nnz;
3415   PetscScalar *values      = NULL;
3416   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3417 
3418   PetscFunctionBegin;
3419   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
3420   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
3421   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
3422   PetscCall(PetscLayoutSetUp(B->rmap));
3423   PetscCall(PetscLayoutSetUp(B->cmap));
3424   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3425   m = B->rmap->n / bs;
3426 
3427   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
3428   PetscCall(PetscMalloc1(m + 1, &nnz));
3429   for (i = 0; i < m; i++) {
3430     nz = ii[i + 1] - ii[i];
3431     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3432     nz_max = PetscMax(nz_max, nz);
3433     nnz[i] = nz;
3434   }
3435   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
3436   PetscCall(PetscFree(nnz));
3437 
3438   values = (PetscScalar *)V;
3439   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3440   for (i = 0; i < m; i++) {
3441     PetscInt        ncols = ii[i + 1] - ii[i];
3442     const PetscInt *icols = jj + ii[i];
3443     if (bs == 1 || !roworiented) {
3444       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
3445       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
3446     } else {
3447       PetscInt j;
3448       for (j = 0; j < ncols; j++) {
3449         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
3450         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
3451       }
3452     }
3453   }
3454   if (!V) PetscCall(PetscFree(values));
3455   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3456   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3457   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3458   PetscFunctionReturn(PETSC_SUCCESS);
3459 }
3460 
3461 /*@C
3462   MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3463 
3464   Not Collective
3465 
3466   Input Parameter:
3467 . A - a `MATSEQBAIJ` matrix
3468 
3469   Output Parameter:
3470 . array - pointer to the data
3471 
3472   Level: intermediate
3473 
3474 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3475 @*/
3476 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[])
3477 {
3478   PetscFunctionBegin;
3479   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
3480   PetscFunctionReturn(PETSC_SUCCESS);
3481 }
3482 
3483 /*@C
3484   MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3485 
3486   Not Collective
3487 
3488   Input Parameters:
3489 + A     - a `MATSEQBAIJ` matrix
3490 - array - pointer to the data
3491 
3492   Level: intermediate
3493 
3494 .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3495 @*/
3496 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[])
3497 {
3498   PetscFunctionBegin;
3499   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
3500   PetscFunctionReturn(PETSC_SUCCESS);
3501 }
3502 
3503 /*MC
3504    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3505    block sparse compressed row format.
3506 
3507    Options Database Keys:
3508 + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()`
3509 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3510 
3511    Level: beginner
3512 
3513    Notes:
3514    `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
3515    space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
3516 
3517    Run with `-info` to see what version of the matrix-vector product is being used
3518 
3519 .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()`
3520 M*/
3521 
3522 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3523 
3524 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3525 {
3526   PetscMPIInt  size;
3527   Mat_SeqBAIJ *b;
3528 
3529   PetscFunctionBegin;
3530   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
3531   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3532 
3533   PetscCall(PetscNew(&b));
3534   B->data   = (void *)b;
3535   B->ops[0] = MatOps_Values;
3536 
3537   b->row          = NULL;
3538   b->col          = NULL;
3539   b->icol         = NULL;
3540   b->reallocs     = 0;
3541   b->saved_values = NULL;
3542 
3543   b->roworiented        = PETSC_TRUE;
3544   b->nonew              = 0;
3545   b->diag               = NULL;
3546   B->spptr              = NULL;
3547   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3548   b->keepnonzeropattern = PETSC_FALSE;
3549 
3550   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
3551   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
3552   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
3553   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
3554   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
3555   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
3556   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
3557   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
3558   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
3559   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
3560 #if defined(PETSC_HAVE_HYPRE)
3561   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
3562 #endif
3563   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
3564   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
3565   PetscFunctionReturn(PETSC_SUCCESS);
3566 }
3567 
3568 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3569 {
3570   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3571   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3572 
3573   PetscFunctionBegin;
3574   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix");
3575   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
3576 
3577   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3578     c->imax           = a->imax;
3579     c->ilen           = a->ilen;
3580     c->free_imax_ilen = PETSC_FALSE;
3581   } else {
3582     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3583     for (i = 0; i < mbs; i++) {
3584       c->imax[i] = a->imax[i];
3585       c->ilen[i] = a->ilen[i];
3586     }
3587     c->free_imax_ilen = PETSC_TRUE;
3588   }
3589 
3590   /* allocate the matrix space */
3591   if (mallocmatspace) {
3592     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3593       PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
3594       PetscCall(PetscArrayzero(c->a, bs2 * nz));
3595       c->free_a       = PETSC_TRUE;
3596       c->i            = a->i;
3597       c->j            = a->j;
3598       c->free_ij      = PETSC_FALSE;
3599       c->parent       = A;
3600       C->preallocated = PETSC_TRUE;
3601       C->assembled    = PETSC_TRUE;
3602 
3603       PetscCall(PetscObjectReference((PetscObject)A));
3604       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3605       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3606     } else {
3607       PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
3608       PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j));
3609       PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i));
3610       c->free_a  = PETSC_TRUE;
3611       c->free_ij = PETSC_TRUE;
3612 
3613       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3614       if (mbs > 0) {
3615         PetscCall(PetscArraycpy(c->j, a->j, nz));
3616         if (cpvalues == MAT_COPY_VALUES) {
3617           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
3618         } else {
3619           PetscCall(PetscArrayzero(c->a, bs2 * nz));
3620         }
3621       }
3622       C->preallocated = PETSC_TRUE;
3623       C->assembled    = PETSC_TRUE;
3624     }
3625   }
3626 
3627   c->roworiented = a->roworiented;
3628   c->nonew       = a->nonew;
3629 
3630   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
3631   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
3632 
3633   c->bs2 = a->bs2;
3634   c->mbs = a->mbs;
3635   c->nbs = a->nbs;
3636 
3637   if (a->diag) {
3638     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3639       c->diag      = a->diag;
3640       c->free_diag = PETSC_FALSE;
3641     } else {
3642       PetscCall(PetscMalloc1(mbs + 1, &c->diag));
3643       for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i];
3644       c->free_diag = PETSC_TRUE;
3645     }
3646   } else c->diag = NULL;
3647 
3648   c->nz         = a->nz;
3649   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3650   c->solve_work = NULL;
3651   c->mult_work  = NULL;
3652   c->sor_workt  = NULL;
3653   c->sor_work   = NULL;
3654 
3655   c->compressedrow.use   = a->compressedrow.use;
3656   c->compressedrow.nrows = a->compressedrow.nrows;
3657   if (a->compressedrow.use) {
3658     i = a->compressedrow.nrows;
3659     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
3660     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
3661     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
3662   } else {
3663     c->compressedrow.use    = PETSC_FALSE;
3664     c->compressedrow.i      = NULL;
3665     c->compressedrow.rindex = NULL;
3666   }
3667   c->nonzerorowcnt = a->nonzerorowcnt;
3668   C->nonzerostate  = A->nonzerostate;
3669 
3670   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
3671   PetscFunctionReturn(PETSC_SUCCESS);
3672 }
3673 
3674 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3675 {
3676   PetscFunctionBegin;
3677   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
3678   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
3679   PetscCall(MatSetType(*B, MATSEQBAIJ));
3680   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
3681   PetscFunctionReturn(PETSC_SUCCESS);
3682 }
3683 
3684 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3685 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3686 {
3687   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3688   PetscInt    *rowidxs, *colidxs;
3689   PetscScalar *matvals;
3690 
3691   PetscFunctionBegin;
3692   PetscCall(PetscViewerSetUp(viewer));
3693 
3694   /* read matrix header */
3695   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3696   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3697   M  = header[1];
3698   N  = header[2];
3699   nz = header[3];
3700   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3701   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3702   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3703 
3704   /* set block sizes from the viewer's .info file */
3705   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3706   /* set local and global sizes if not set already */
3707   if (mat->rmap->n < 0) mat->rmap->n = M;
3708   if (mat->cmap->n < 0) mat->cmap->n = N;
3709   if (mat->rmap->N < 0) mat->rmap->N = M;
3710   if (mat->cmap->N < 0) mat->cmap->N = N;
3711   PetscCall(PetscLayoutSetUp(mat->rmap));
3712   PetscCall(PetscLayoutSetUp(mat->cmap));
3713 
3714   /* check if the matrix sizes are correct */
3715   PetscCall(MatGetSize(mat, &rows, &cols));
3716   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3717   PetscCall(MatGetBlockSize(mat, &bs));
3718   PetscCall(MatGetLocalSize(mat, &m, &n));
3719   mbs = m / bs;
3720   nbs = n / bs;
3721 
3722   /* read in row lengths, column indices and nonzero values */
3723   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3724   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
3725   rowidxs[0] = 0;
3726   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3727   sum = rowidxs[m];
3728   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3729 
3730   /* read in column indices and nonzero values */
3731   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
3732   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
3733   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3734 
3735   {               /* preallocate matrix storage */
3736     PetscBT   bt; /* helper bit set to count nonzeros */
3737     PetscInt *nnz;
3738     PetscBool sbaij;
3739 
3740     PetscCall(PetscBTCreate(nbs, &bt));
3741     PetscCall(PetscCalloc1(mbs, &nnz));
3742     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3743     for (i = 0; i < mbs; i++) {
3744       PetscCall(PetscBTMemzero(nbs, bt));
3745       for (k = 0; k < bs; k++) {
3746         PetscInt row = bs * i + k;
3747         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3748           PetscInt col = colidxs[j];
3749           if (!sbaij || col >= row)
3750             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3751         }
3752       }
3753     }
3754     PetscCall(PetscBTDestroy(&bt));
3755     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
3756     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
3757     PetscCall(PetscFree(nnz));
3758   }
3759 
3760   /* store matrix values */
3761   for (i = 0; i < m; i++) {
3762     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
3763     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3764   }
3765 
3766   PetscCall(PetscFree(rowidxs));
3767   PetscCall(PetscFree2(colidxs, matvals));
3768   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3769   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3770   PetscFunctionReturn(PETSC_SUCCESS);
3771 }
3772 
3773 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3774 {
3775   PetscBool isbinary;
3776 
3777   PetscFunctionBegin;
3778   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3779   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3780   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
3781   PetscFunctionReturn(PETSC_SUCCESS);
3782 }
3783 
3784 /*@
3785   MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3786   compressed row) format.  For good matrix assembly performance the
3787   user should preallocate the matrix storage by setting the parameter `nz`
3788   (or the array `nnz`).
3789 
3790   Collective
3791 
3792   Input Parameters:
3793 + comm - MPI communicator, set to `PETSC_COMM_SELF`
3794 . bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3795          blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3796 . m    - number of rows
3797 . n    - number of columns
3798 . nz   - number of nonzero blocks  per block row (same for all rows)
3799 - nnz  - array containing the number of nonzero blocks in the various block rows
3800          (possibly different for each block row) or `NULL`
3801 
3802   Output Parameter:
3803 . A - the matrix
3804 
3805   Options Database Keys:
3806 + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3807 - -mat_block_size - size of the blocks to use
3808 
3809   Level: intermediate
3810 
3811   Notes:
3812   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3813   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3814   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
3815 
3816   The number of rows and columns must be divisible by blocksize.
3817 
3818   If the `nnz` parameter is given then the `nz` parameter is ignored
3819 
3820   A nonzero block is any block that as 1 or more nonzeros in it
3821 
3822   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3823   storage.  That is, the stored row and column indices can begin at
3824   either one (as in Fortran) or zero.
3825 
3826   Specify the preallocated storage with either `nz` or `nnz` (not both).
3827   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3828   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3829   matrices.
3830 
3831 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3832 @*/
3833 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3834 {
3835   PetscFunctionBegin;
3836   PetscCall(MatCreate(comm, A));
3837   PetscCall(MatSetSizes(*A, m, n, m, n));
3838   PetscCall(MatSetType(*A, MATSEQBAIJ));
3839   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
3840   PetscFunctionReturn(PETSC_SUCCESS);
3841 }
3842 
3843 /*@
3844   MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3845   per row in the matrix. For good matrix assembly performance the
3846   user should preallocate the matrix storage by setting the parameter `nz`
3847   (or the array `nnz`).
3848 
3849   Collective
3850 
3851   Input Parameters:
3852 + B   - the matrix
3853 . bs  - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3854         blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3855 . nz  - number of block nonzeros per block row (same for all rows)
3856 - nnz - array containing the number of block nonzeros in the various block rows
3857         (possibly different for each block row) or `NULL`
3858 
3859   Options Database Keys:
3860 + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3861 - -mat_block_size - size of the blocks to use
3862 
3863   Level: intermediate
3864 
3865   Notes:
3866   If the `nnz` parameter is given then the `nz` parameter is ignored
3867 
3868   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3869   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3870   You can also run with the option `-info` and look for messages with the string
3871   malloc in them to see if additional memory allocation was needed.
3872 
3873   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3874   storage.  That is, the stored row and column indices can begin at
3875   either one (as in Fortran) or zero.
3876 
3877   Specify the preallocated storage with either `nz` or `nnz` (not both).
3878   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3879   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3880 
3881 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3882 @*/
3883 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3884 {
3885   PetscFunctionBegin;
3886   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3887   PetscValidType(B, 1);
3888   PetscValidLogicalCollectiveInt(B, bs, 2);
3889   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
3890   PetscFunctionReturn(PETSC_SUCCESS);
3891 }
3892 
3893 /*@C
3894   MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3895 
3896   Collective
3897 
3898   Input Parameters:
3899 + B  - the matrix
3900 . bs - the blocksize
3901 . i  - the indices into `j` for the start of each local row (indices start with zero)
3902 . j  - the column indices for each local row (indices start with zero) these must be sorted for each row
3903 - v  - optional values in the matrix, use `NULL` if not provided
3904 
3905   Level: advanced
3906 
3907   Notes:
3908   The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()`
3909 
3910   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
3911   may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
3912   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3913   `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3914   block column and the second index is over columns within a block.
3915 
3916   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3917 
3918 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3919 @*/
3920 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3921 {
3922   PetscFunctionBegin;
3923   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3924   PetscValidType(B, 1);
3925   PetscValidLogicalCollectiveInt(B, bs, 2);
3926   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
3927   PetscFunctionReturn(PETSC_SUCCESS);
3928 }
3929 
3930 /*@
3931   MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3932 
3933   Collective
3934 
3935   Input Parameters:
3936 + comm - must be an MPI communicator of size 1
3937 . bs   - size of block
3938 . m    - number of rows
3939 . n    - number of columns
3940 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3941 . j    - column indices
3942 - a    - matrix values
3943 
3944   Output Parameter:
3945 . mat - the matrix
3946 
3947   Level: advanced
3948 
3949   Notes:
3950   The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays
3951   once the matrix is destroyed
3952 
3953   You cannot set new nonzero locations into this matrix, that will generate an error.
3954 
3955   The `i` and `j` indices are 0 based
3956 
3957   When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3958 
3959   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3960   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3961   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3962   with column-major ordering within blocks.
3963 
3964 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3965 @*/
3966 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3967 {
3968   Mat_SeqBAIJ *baij;
3969 
3970   PetscFunctionBegin;
3971   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
3972   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3973 
3974   PetscCall(MatCreate(comm, mat));
3975   PetscCall(MatSetSizes(*mat, m, n, m, n));
3976   PetscCall(MatSetType(*mat, MATSEQBAIJ));
3977   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3978   baij = (Mat_SeqBAIJ *)(*mat)->data;
3979   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3980 
3981   baij->i = i;
3982   baij->j = j;
3983   baij->a = a;
3984 
3985   baij->nonew          = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3986   baij->free_a         = PETSC_FALSE;
3987   baij->free_ij        = PETSC_FALSE;
3988   baij->free_imax_ilen = PETSC_TRUE;
3989 
3990   for (PetscInt ii = 0; ii < m; ii++) {
3991     const PetscInt row_len = i[ii + 1] - i[ii];
3992 
3993     baij->ilen[ii] = baij->imax[ii] = row_len;
3994     PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
3995   }
3996   if (PetscDefined(USE_DEBUG)) {
3997     for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
3998       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3999       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
4000     }
4001   }
4002 
4003   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
4004   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
4005   PetscFunctionReturn(PETSC_SUCCESS);
4006 }
4007 
4008 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4009 {
4010   PetscFunctionBegin;
4011   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
4012   PetscFunctionReturn(PETSC_SUCCESS);
4013 }
4014