xref: /petsc/src/mat/impls/baij/seq/baij.c (revision bcd3bd92eda2d5998e2f14c4bbfb33bd936bdc3e)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h> /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */
12 #define TYPE BAIJ
13 #define TYPE_BS
14 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
15 #undef TYPE_BS
16 #define TYPE_BS _BS
17 #define TYPE_BS_ON
18 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
19 #undef TYPE_BS
20 #include "../src/mat/impls/aij/seq/seqhashmat.h"
21 #undef TYPE
22 #undef TYPE_BS_ON
23 
24 #if defined(PETSC_HAVE_HYPRE)
25 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
26 #endif
27 
28 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
29 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
30 #endif
31 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
32 
33 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
34 {
35   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
36   PetscInt     m, n, ib, jb, bs = A->rmap->bs;
37   MatScalar   *a_val = a_aij->a;
38 
39   PetscFunctionBegin;
40   PetscCall(MatGetSize(A, &m, &n));
41   PetscCall(PetscArrayzero(reductions, n));
42   if (type == NORM_2) {
43     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
44       for (jb = 0; jb < bs; jb++) {
45         for (ib = 0; ib < bs; ib++) {
46           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
47           a_val++;
48         }
49       }
50     }
51   } else if (type == NORM_1) {
52     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
53       for (jb = 0; jb < bs; jb++) {
54         for (ib = 0; ib < bs; ib++) {
55           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
56           a_val++;
57         }
58       }
59     }
60   } else if (type == NORM_INFINITY) {
61     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
62       for (jb = 0; jb < bs; jb++) {
63         for (ib = 0; ib < bs; ib++) {
64           int col         = A->cmap->rstart + a_aij->j[i] * bs + jb;
65           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
66           a_val++;
67         }
68       }
69     }
70   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
71     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
72       for (jb = 0; jb < bs; jb++) {
73         for (ib = 0; ib < bs; ib++) {
74           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
75           a_val++;
76         }
77       }
78     }
79   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
80     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
81       for (jb = 0; jb < bs; jb++) {
82         for (ib = 0; ib < bs; ib++) {
83           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
84           a_val++;
85         }
86       }
87     }
88   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
89   if (type == NORM_2) {
90     for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
91   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
92     for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
93   }
94   PetscFunctionReturn(PETSC_SUCCESS);
95 }
96 
97 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
98 {
99   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
100   PetscInt    *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
101   MatScalar   *v     = a->a, *odiag, *diag, work[25], *v_work;
102   PetscReal    shift = 0.0;
103   PetscBool    allowzeropivot, zeropivotdetected = PETSC_FALSE;
104 
105   PetscFunctionBegin;
106   allowzeropivot = PetscNot(A->erroriffailure);
107 
108   if (a->idiagvalid) {
109     if (values) *values = a->idiag;
110     PetscFunctionReturn(PETSC_SUCCESS);
111   }
112   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
113   diag_offset = a->diag;
114   if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); }
115   diag = a->idiag;
116   if (values) *values = a->idiag;
117   /* factor and invert each block */
118   switch (bs) {
119   case 1:
120     for (i = 0; i < mbs; i++) {
121       odiag   = v + 1 * diag_offset[i];
122       diag[0] = odiag[0];
123 
124       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
125         if (allowzeropivot) {
126           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
127           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
128           A->factorerror_zeropivot_row   = i;
129           PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
130         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
131       }
132 
133       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
134       diag += 1;
135     }
136     break;
137   case 2:
138     for (i = 0; i < mbs; i++) {
139       odiag   = v + 4 * diag_offset[i];
140       diag[0] = odiag[0];
141       diag[1] = odiag[1];
142       diag[2] = odiag[2];
143       diag[3] = odiag[3];
144       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
145       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
146       diag += 4;
147     }
148     break;
149   case 3:
150     for (i = 0; i < mbs; i++) {
151       odiag   = v + 9 * diag_offset[i];
152       diag[0] = odiag[0];
153       diag[1] = odiag[1];
154       diag[2] = odiag[2];
155       diag[3] = odiag[3];
156       diag[4] = odiag[4];
157       diag[5] = odiag[5];
158       diag[6] = odiag[6];
159       diag[7] = odiag[7];
160       diag[8] = odiag[8];
161       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
162       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
163       diag += 9;
164     }
165     break;
166   case 4:
167     for (i = 0; i < mbs; i++) {
168       odiag = v + 16 * diag_offset[i];
169       PetscCall(PetscArraycpy(diag, odiag, 16));
170       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
171       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
172       diag += 16;
173     }
174     break;
175   case 5:
176     for (i = 0; i < mbs; i++) {
177       odiag = v + 25 * diag_offset[i];
178       PetscCall(PetscArraycpy(diag, odiag, 25));
179       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
180       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
181       diag += 25;
182     }
183     break;
184   case 6:
185     for (i = 0; i < mbs; i++) {
186       odiag = v + 36 * diag_offset[i];
187       PetscCall(PetscArraycpy(diag, odiag, 36));
188       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
189       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
190       diag += 36;
191     }
192     break;
193   case 7:
194     for (i = 0; i < mbs; i++) {
195       odiag = v + 49 * diag_offset[i];
196       PetscCall(PetscArraycpy(diag, odiag, 49));
197       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
198       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
199       diag += 49;
200     }
201     break;
202   default:
203     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
204     for (i = 0; i < mbs; i++) {
205       odiag = v + bs2 * diag_offset[i];
206       PetscCall(PetscArraycpy(diag, odiag, bs2));
207       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
208       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
209       diag += bs2;
210     }
211     PetscCall(PetscFree2(v_work, v_pivots));
212   }
213   a->idiagvalid = PETSC_TRUE;
214   PetscFunctionReturn(PETSC_SUCCESS);
215 }
216 
217 PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
218 {
219   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
220   PetscScalar       *x, *work, *w, *workt, *t;
221   const MatScalar   *v, *aa = a->a, *idiag;
222   const PetscScalar *b, *xb;
223   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
224   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
225   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;
226 
227   PetscFunctionBegin;
228   its = its * lits;
229   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
230   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
231   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
232   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
233   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
234 
235   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
236 
237   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
238   diag  = a->diag;
239   idiag = a->idiag;
240   k     = PetscMax(A->rmap->n, A->cmap->n);
241   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
242   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
243   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
244   work = a->mult_work;
245   t    = a->sor_workt;
246   w    = a->sor_work;
247 
248   PetscCall(VecGetArray(xx, &x));
249   PetscCall(VecGetArrayRead(bb, &b));
250 
251   if (flag & SOR_ZERO_INITIAL_GUESS) {
252     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
253       switch (bs) {
254       case 1:
255         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
256         t[0] = b[0];
257         i2   = 1;
258         idiag += 1;
259         for (i = 1; i < m; i++) {
260           v    = aa + ai[i];
261           vi   = aj + ai[i];
262           nz   = diag[i] - ai[i];
263           s[0] = b[i2];
264           for (j = 0; j < nz; j++) {
265             xw[0] = x[vi[j]];
266             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
267           }
268           t[i2] = s[0];
269           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
270           x[i2] = xw[0];
271           idiag += 1;
272           i2 += 1;
273         }
274         break;
275       case 2:
276         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
277         t[0] = b[0];
278         t[1] = b[1];
279         i2   = 2;
280         idiag += 4;
281         for (i = 1; i < m; i++) {
282           v    = aa + 4 * ai[i];
283           vi   = aj + ai[i];
284           nz   = diag[i] - ai[i];
285           s[0] = b[i2];
286           s[1] = b[i2 + 1];
287           for (j = 0; j < nz; j++) {
288             idx   = 2 * vi[j];
289             it    = 4 * j;
290             xw[0] = x[idx];
291             xw[1] = x[1 + idx];
292             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
293           }
294           t[i2]     = s[0];
295           t[i2 + 1] = s[1];
296           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
297           x[i2]     = xw[0];
298           x[i2 + 1] = xw[1];
299           idiag += 4;
300           i2 += 2;
301         }
302         break;
303       case 3:
304         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
305         t[0] = b[0];
306         t[1] = b[1];
307         t[2] = b[2];
308         i2   = 3;
309         idiag += 9;
310         for (i = 1; i < m; i++) {
311           v    = aa + 9 * ai[i];
312           vi   = aj + ai[i];
313           nz   = diag[i] - ai[i];
314           s[0] = b[i2];
315           s[1] = b[i2 + 1];
316           s[2] = b[i2 + 2];
317           while (nz--) {
318             idx   = 3 * (*vi++);
319             xw[0] = x[idx];
320             xw[1] = x[1 + idx];
321             xw[2] = x[2 + idx];
322             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
323             v += 9;
324           }
325           t[i2]     = s[0];
326           t[i2 + 1] = s[1];
327           t[i2 + 2] = s[2];
328           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
329           x[i2]     = xw[0];
330           x[i2 + 1] = xw[1];
331           x[i2 + 2] = xw[2];
332           idiag += 9;
333           i2 += 3;
334         }
335         break;
336       case 4:
337         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
338         t[0] = b[0];
339         t[1] = b[1];
340         t[2] = b[2];
341         t[3] = b[3];
342         i2   = 4;
343         idiag += 16;
344         for (i = 1; i < m; i++) {
345           v    = aa + 16 * ai[i];
346           vi   = aj + ai[i];
347           nz   = diag[i] - ai[i];
348           s[0] = b[i2];
349           s[1] = b[i2 + 1];
350           s[2] = b[i2 + 2];
351           s[3] = b[i2 + 3];
352           while (nz--) {
353             idx   = 4 * (*vi++);
354             xw[0] = x[idx];
355             xw[1] = x[1 + idx];
356             xw[2] = x[2 + idx];
357             xw[3] = x[3 + idx];
358             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
359             v += 16;
360           }
361           t[i2]     = s[0];
362           t[i2 + 1] = s[1];
363           t[i2 + 2] = s[2];
364           t[i2 + 3] = s[3];
365           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
366           x[i2]     = xw[0];
367           x[i2 + 1] = xw[1];
368           x[i2 + 2] = xw[2];
369           x[i2 + 3] = xw[3];
370           idiag += 16;
371           i2 += 4;
372         }
373         break;
374       case 5:
375         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
376         t[0] = b[0];
377         t[1] = b[1];
378         t[2] = b[2];
379         t[3] = b[3];
380         t[4] = b[4];
381         i2   = 5;
382         idiag += 25;
383         for (i = 1; i < m; i++) {
384           v    = aa + 25 * ai[i];
385           vi   = aj + ai[i];
386           nz   = diag[i] - ai[i];
387           s[0] = b[i2];
388           s[1] = b[i2 + 1];
389           s[2] = b[i2 + 2];
390           s[3] = b[i2 + 3];
391           s[4] = b[i2 + 4];
392           while (nz--) {
393             idx   = 5 * (*vi++);
394             xw[0] = x[idx];
395             xw[1] = x[1 + idx];
396             xw[2] = x[2 + idx];
397             xw[3] = x[3 + idx];
398             xw[4] = x[4 + idx];
399             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
400             v += 25;
401           }
402           t[i2]     = s[0];
403           t[i2 + 1] = s[1];
404           t[i2 + 2] = s[2];
405           t[i2 + 3] = s[3];
406           t[i2 + 4] = s[4];
407           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
408           x[i2]     = xw[0];
409           x[i2 + 1] = xw[1];
410           x[i2 + 2] = xw[2];
411           x[i2 + 3] = xw[3];
412           x[i2 + 4] = xw[4];
413           idiag += 25;
414           i2 += 5;
415         }
416         break;
417       case 6:
418         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
419         t[0] = b[0];
420         t[1] = b[1];
421         t[2] = b[2];
422         t[3] = b[3];
423         t[4] = b[4];
424         t[5] = b[5];
425         i2   = 6;
426         idiag += 36;
427         for (i = 1; i < m; i++) {
428           v    = aa + 36 * ai[i];
429           vi   = aj + ai[i];
430           nz   = diag[i] - ai[i];
431           s[0] = b[i2];
432           s[1] = b[i2 + 1];
433           s[2] = b[i2 + 2];
434           s[3] = b[i2 + 3];
435           s[4] = b[i2 + 4];
436           s[5] = b[i2 + 5];
437           while (nz--) {
438             idx   = 6 * (*vi++);
439             xw[0] = x[idx];
440             xw[1] = x[1 + idx];
441             xw[2] = x[2 + idx];
442             xw[3] = x[3 + idx];
443             xw[4] = x[4 + idx];
444             xw[5] = x[5 + idx];
445             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
446             v += 36;
447           }
448           t[i2]     = s[0];
449           t[i2 + 1] = s[1];
450           t[i2 + 2] = s[2];
451           t[i2 + 3] = s[3];
452           t[i2 + 4] = s[4];
453           t[i2 + 5] = s[5];
454           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
455           x[i2]     = xw[0];
456           x[i2 + 1] = xw[1];
457           x[i2 + 2] = xw[2];
458           x[i2 + 3] = xw[3];
459           x[i2 + 4] = xw[4];
460           x[i2 + 5] = xw[5];
461           idiag += 36;
462           i2 += 6;
463         }
464         break;
465       case 7:
466         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
467         t[0] = b[0];
468         t[1] = b[1];
469         t[2] = b[2];
470         t[3] = b[3];
471         t[4] = b[4];
472         t[5] = b[5];
473         t[6] = b[6];
474         i2   = 7;
475         idiag += 49;
476         for (i = 1; i < m; i++) {
477           v    = aa + 49 * ai[i];
478           vi   = aj + ai[i];
479           nz   = diag[i] - ai[i];
480           s[0] = b[i2];
481           s[1] = b[i2 + 1];
482           s[2] = b[i2 + 2];
483           s[3] = b[i2 + 3];
484           s[4] = b[i2 + 4];
485           s[5] = b[i2 + 5];
486           s[6] = b[i2 + 6];
487           while (nz--) {
488             idx   = 7 * (*vi++);
489             xw[0] = x[idx];
490             xw[1] = x[1 + idx];
491             xw[2] = x[2 + idx];
492             xw[3] = x[3 + idx];
493             xw[4] = x[4 + idx];
494             xw[5] = x[5 + idx];
495             xw[6] = x[6 + idx];
496             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
497             v += 49;
498           }
499           t[i2]     = s[0];
500           t[i2 + 1] = s[1];
501           t[i2 + 2] = s[2];
502           t[i2 + 3] = s[3];
503           t[i2 + 4] = s[4];
504           t[i2 + 5] = s[5];
505           t[i2 + 6] = s[6];
506           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
507           x[i2]     = xw[0];
508           x[i2 + 1] = xw[1];
509           x[i2 + 2] = xw[2];
510           x[i2 + 3] = xw[3];
511           x[i2 + 4] = xw[4];
512           x[i2 + 5] = xw[5];
513           x[i2 + 6] = xw[6];
514           idiag += 49;
515           i2 += 7;
516         }
517         break;
518       default:
519         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
520         PetscCall(PetscArraycpy(t, b, bs));
521         i2 = bs;
522         idiag += bs2;
523         for (i = 1; i < m; i++) {
524           v  = aa + bs2 * ai[i];
525           vi = aj + ai[i];
526           nz = diag[i] - ai[i];
527 
528           PetscCall(PetscArraycpy(w, b + i2, bs));
529           /* copy all rows of x that are needed into contiguous space */
530           workt = work;
531           for (j = 0; j < nz; j++) {
532             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
533             workt += bs;
534           }
535           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
536           PetscCall(PetscArraycpy(t + i2, w, bs));
537           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
538 
539           idiag += bs2;
540           i2 += bs;
541         }
542         break;
543       }
544       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
545       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
546       xb = t;
547     } else xb = b;
548     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
549       idiag = a->idiag + bs2 * (a->mbs - 1);
550       i2    = bs * (m - 1);
551       switch (bs) {
552       case 1:
553         s[0] = xb[i2];
554         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
555         x[i2] = xw[0];
556         i2 -= 1;
557         for (i = m - 2; i >= 0; i--) {
558           v    = aa + (diag[i] + 1);
559           vi   = aj + diag[i] + 1;
560           nz   = ai[i + 1] - diag[i] - 1;
561           s[0] = xb[i2];
562           for (j = 0; j < nz; j++) {
563             xw[0] = x[vi[j]];
564             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
565           }
566           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
567           x[i2] = xw[0];
568           idiag -= 1;
569           i2 -= 1;
570         }
571         break;
572       case 2:
573         s[0] = xb[i2];
574         s[1] = xb[i2 + 1];
575         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
576         x[i2]     = xw[0];
577         x[i2 + 1] = xw[1];
578         i2 -= 2;
579         idiag -= 4;
580         for (i = m - 2; i >= 0; i--) {
581           v    = aa + 4 * (diag[i] + 1);
582           vi   = aj + diag[i] + 1;
583           nz   = ai[i + 1] - diag[i] - 1;
584           s[0] = xb[i2];
585           s[1] = xb[i2 + 1];
586           for (j = 0; j < nz; j++) {
587             idx   = 2 * vi[j];
588             it    = 4 * j;
589             xw[0] = x[idx];
590             xw[1] = x[1 + idx];
591             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
592           }
593           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
594           x[i2]     = xw[0];
595           x[i2 + 1] = xw[1];
596           idiag -= 4;
597           i2 -= 2;
598         }
599         break;
600       case 3:
601         s[0] = xb[i2];
602         s[1] = xb[i2 + 1];
603         s[2] = xb[i2 + 2];
604         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
605         x[i2]     = xw[0];
606         x[i2 + 1] = xw[1];
607         x[i2 + 2] = xw[2];
608         i2 -= 3;
609         idiag -= 9;
610         for (i = m - 2; i >= 0; i--) {
611           v    = aa + 9 * (diag[i] + 1);
612           vi   = aj + diag[i] + 1;
613           nz   = ai[i + 1] - diag[i] - 1;
614           s[0] = xb[i2];
615           s[1] = xb[i2 + 1];
616           s[2] = xb[i2 + 2];
617           while (nz--) {
618             idx   = 3 * (*vi++);
619             xw[0] = x[idx];
620             xw[1] = x[1 + idx];
621             xw[2] = x[2 + idx];
622             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
623             v += 9;
624           }
625           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
626           x[i2]     = xw[0];
627           x[i2 + 1] = xw[1];
628           x[i2 + 2] = xw[2];
629           idiag -= 9;
630           i2 -= 3;
631         }
632         break;
633       case 4:
634         s[0] = xb[i2];
635         s[1] = xb[i2 + 1];
636         s[2] = xb[i2 + 2];
637         s[3] = xb[i2 + 3];
638         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
639         x[i2]     = xw[0];
640         x[i2 + 1] = xw[1];
641         x[i2 + 2] = xw[2];
642         x[i2 + 3] = xw[3];
643         i2 -= 4;
644         idiag -= 16;
645         for (i = m - 2; i >= 0; i--) {
646           v    = aa + 16 * (diag[i] + 1);
647           vi   = aj + diag[i] + 1;
648           nz   = ai[i + 1] - diag[i] - 1;
649           s[0] = xb[i2];
650           s[1] = xb[i2 + 1];
651           s[2] = xb[i2 + 2];
652           s[3] = xb[i2 + 3];
653           while (nz--) {
654             idx   = 4 * (*vi++);
655             xw[0] = x[idx];
656             xw[1] = x[1 + idx];
657             xw[2] = x[2 + idx];
658             xw[3] = x[3 + idx];
659             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
660             v += 16;
661           }
662           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
663           x[i2]     = xw[0];
664           x[i2 + 1] = xw[1];
665           x[i2 + 2] = xw[2];
666           x[i2 + 3] = xw[3];
667           idiag -= 16;
668           i2 -= 4;
669         }
670         break;
671       case 5:
672         s[0] = xb[i2];
673         s[1] = xb[i2 + 1];
674         s[2] = xb[i2 + 2];
675         s[3] = xb[i2 + 3];
676         s[4] = xb[i2 + 4];
677         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
678         x[i2]     = xw[0];
679         x[i2 + 1] = xw[1];
680         x[i2 + 2] = xw[2];
681         x[i2 + 3] = xw[3];
682         x[i2 + 4] = xw[4];
683         i2 -= 5;
684         idiag -= 25;
685         for (i = m - 2; i >= 0; i--) {
686           v    = aa + 25 * (diag[i] + 1);
687           vi   = aj + diag[i] + 1;
688           nz   = ai[i + 1] - diag[i] - 1;
689           s[0] = xb[i2];
690           s[1] = xb[i2 + 1];
691           s[2] = xb[i2 + 2];
692           s[3] = xb[i2 + 3];
693           s[4] = xb[i2 + 4];
694           while (nz--) {
695             idx   = 5 * (*vi++);
696             xw[0] = x[idx];
697             xw[1] = x[1 + idx];
698             xw[2] = x[2 + idx];
699             xw[3] = x[3 + idx];
700             xw[4] = x[4 + idx];
701             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
702             v += 25;
703           }
704           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
705           x[i2]     = xw[0];
706           x[i2 + 1] = xw[1];
707           x[i2 + 2] = xw[2];
708           x[i2 + 3] = xw[3];
709           x[i2 + 4] = xw[4];
710           idiag -= 25;
711           i2 -= 5;
712         }
713         break;
714       case 6:
715         s[0] = xb[i2];
716         s[1] = xb[i2 + 1];
717         s[2] = xb[i2 + 2];
718         s[3] = xb[i2 + 3];
719         s[4] = xb[i2 + 4];
720         s[5] = xb[i2 + 5];
721         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
722         x[i2]     = xw[0];
723         x[i2 + 1] = xw[1];
724         x[i2 + 2] = xw[2];
725         x[i2 + 3] = xw[3];
726         x[i2 + 4] = xw[4];
727         x[i2 + 5] = xw[5];
728         i2 -= 6;
729         idiag -= 36;
730         for (i = m - 2; i >= 0; i--) {
731           v    = aa + 36 * (diag[i] + 1);
732           vi   = aj + diag[i] + 1;
733           nz   = ai[i + 1] - diag[i] - 1;
734           s[0] = xb[i2];
735           s[1] = xb[i2 + 1];
736           s[2] = xb[i2 + 2];
737           s[3] = xb[i2 + 3];
738           s[4] = xb[i2 + 4];
739           s[5] = xb[i2 + 5];
740           while (nz--) {
741             idx   = 6 * (*vi++);
742             xw[0] = x[idx];
743             xw[1] = x[1 + idx];
744             xw[2] = x[2 + idx];
745             xw[3] = x[3 + idx];
746             xw[4] = x[4 + idx];
747             xw[5] = x[5 + idx];
748             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
749             v += 36;
750           }
751           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
752           x[i2]     = xw[0];
753           x[i2 + 1] = xw[1];
754           x[i2 + 2] = xw[2];
755           x[i2 + 3] = xw[3];
756           x[i2 + 4] = xw[4];
757           x[i2 + 5] = xw[5];
758           idiag -= 36;
759           i2 -= 6;
760         }
761         break;
762       case 7:
763         s[0] = xb[i2];
764         s[1] = xb[i2 + 1];
765         s[2] = xb[i2 + 2];
766         s[3] = xb[i2 + 3];
767         s[4] = xb[i2 + 4];
768         s[5] = xb[i2 + 5];
769         s[6] = xb[i2 + 6];
770         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
771         x[i2]     = xw[0];
772         x[i2 + 1] = xw[1];
773         x[i2 + 2] = xw[2];
774         x[i2 + 3] = xw[3];
775         x[i2 + 4] = xw[4];
776         x[i2 + 5] = xw[5];
777         x[i2 + 6] = xw[6];
778         i2 -= 7;
779         idiag -= 49;
780         for (i = m - 2; i >= 0; i--) {
781           v    = aa + 49 * (diag[i] + 1);
782           vi   = aj + diag[i] + 1;
783           nz   = ai[i + 1] - diag[i] - 1;
784           s[0] = xb[i2];
785           s[1] = xb[i2 + 1];
786           s[2] = xb[i2 + 2];
787           s[3] = xb[i2 + 3];
788           s[4] = xb[i2 + 4];
789           s[5] = xb[i2 + 5];
790           s[6] = xb[i2 + 6];
791           while (nz--) {
792             idx   = 7 * (*vi++);
793             xw[0] = x[idx];
794             xw[1] = x[1 + idx];
795             xw[2] = x[2 + idx];
796             xw[3] = x[3 + idx];
797             xw[4] = x[4 + idx];
798             xw[5] = x[5 + idx];
799             xw[6] = x[6 + idx];
800             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
801             v += 49;
802           }
803           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
804           x[i2]     = xw[0];
805           x[i2 + 1] = xw[1];
806           x[i2 + 2] = xw[2];
807           x[i2 + 3] = xw[3];
808           x[i2 + 4] = xw[4];
809           x[i2 + 5] = xw[5];
810           x[i2 + 6] = xw[6];
811           idiag -= 49;
812           i2 -= 7;
813         }
814         break;
815       default:
816         PetscCall(PetscArraycpy(w, xb + i2, bs));
817         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
818         i2 -= bs;
819         idiag -= bs2;
820         for (i = m - 2; i >= 0; i--) {
821           v  = aa + bs2 * (diag[i] + 1);
822           vi = aj + diag[i] + 1;
823           nz = ai[i + 1] - diag[i] - 1;
824 
825           PetscCall(PetscArraycpy(w, xb + i2, bs));
826           /* copy all rows of x that are needed into contiguous space */
827           workt = work;
828           for (j = 0; j < nz; j++) {
829             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
830             workt += bs;
831           }
832           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
833           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
834 
835           idiag -= bs2;
836           i2 -= bs;
837         }
838         break;
839       }
840       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
841     }
842     its--;
843   }
844   while (its--) {
845     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
846       idiag = a->idiag;
847       i2    = 0;
848       switch (bs) {
849       case 1:
850         for (i = 0; i < m; i++) {
851           v    = aa + ai[i];
852           vi   = aj + ai[i];
853           nz   = ai[i + 1] - ai[i];
854           s[0] = b[i2];
855           for (j = 0; j < nz; j++) {
856             xw[0] = x[vi[j]];
857             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
858           }
859           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
860           x[i2] += xw[0];
861           idiag += 1;
862           i2 += 1;
863         }
864         break;
865       case 2:
866         for (i = 0; i < m; i++) {
867           v    = aa + 4 * ai[i];
868           vi   = aj + ai[i];
869           nz   = ai[i + 1] - ai[i];
870           s[0] = b[i2];
871           s[1] = b[i2 + 1];
872           for (j = 0; j < nz; j++) {
873             idx   = 2 * vi[j];
874             it    = 4 * j;
875             xw[0] = x[idx];
876             xw[1] = x[1 + idx];
877             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
878           }
879           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
880           x[i2] += xw[0];
881           x[i2 + 1] += xw[1];
882           idiag += 4;
883           i2 += 2;
884         }
885         break;
886       case 3:
887         for (i = 0; i < m; i++) {
888           v    = aa + 9 * ai[i];
889           vi   = aj + ai[i];
890           nz   = ai[i + 1] - ai[i];
891           s[0] = b[i2];
892           s[1] = b[i2 + 1];
893           s[2] = b[i2 + 2];
894           while (nz--) {
895             idx   = 3 * (*vi++);
896             xw[0] = x[idx];
897             xw[1] = x[1 + idx];
898             xw[2] = x[2 + idx];
899             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
900             v += 9;
901           }
902           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
903           x[i2] += xw[0];
904           x[i2 + 1] += xw[1];
905           x[i2 + 2] += xw[2];
906           idiag += 9;
907           i2 += 3;
908         }
909         break;
910       case 4:
911         for (i = 0; i < m; i++) {
912           v    = aa + 16 * ai[i];
913           vi   = aj + ai[i];
914           nz   = ai[i + 1] - ai[i];
915           s[0] = b[i2];
916           s[1] = b[i2 + 1];
917           s[2] = b[i2 + 2];
918           s[3] = b[i2 + 3];
919           while (nz--) {
920             idx   = 4 * (*vi++);
921             xw[0] = x[idx];
922             xw[1] = x[1 + idx];
923             xw[2] = x[2 + idx];
924             xw[3] = x[3 + idx];
925             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
926             v += 16;
927           }
928           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
929           x[i2] += xw[0];
930           x[i2 + 1] += xw[1];
931           x[i2 + 2] += xw[2];
932           x[i2 + 3] += xw[3];
933           idiag += 16;
934           i2 += 4;
935         }
936         break;
937       case 5:
938         for (i = 0; i < m; i++) {
939           v    = aa + 25 * ai[i];
940           vi   = aj + ai[i];
941           nz   = ai[i + 1] - ai[i];
942           s[0] = b[i2];
943           s[1] = b[i2 + 1];
944           s[2] = b[i2 + 2];
945           s[3] = b[i2 + 3];
946           s[4] = b[i2 + 4];
947           while (nz--) {
948             idx   = 5 * (*vi++);
949             xw[0] = x[idx];
950             xw[1] = x[1 + idx];
951             xw[2] = x[2 + idx];
952             xw[3] = x[3 + idx];
953             xw[4] = x[4 + idx];
954             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
955             v += 25;
956           }
957           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
958           x[i2] += xw[0];
959           x[i2 + 1] += xw[1];
960           x[i2 + 2] += xw[2];
961           x[i2 + 3] += xw[3];
962           x[i2 + 4] += xw[4];
963           idiag += 25;
964           i2 += 5;
965         }
966         break;
967       case 6:
968         for (i = 0; i < m; i++) {
969           v    = aa + 36 * ai[i];
970           vi   = aj + ai[i];
971           nz   = ai[i + 1] - ai[i];
972           s[0] = b[i2];
973           s[1] = b[i2 + 1];
974           s[2] = b[i2 + 2];
975           s[3] = b[i2 + 3];
976           s[4] = b[i2 + 4];
977           s[5] = b[i2 + 5];
978           while (nz--) {
979             idx   = 6 * (*vi++);
980             xw[0] = x[idx];
981             xw[1] = x[1 + idx];
982             xw[2] = x[2 + idx];
983             xw[3] = x[3 + idx];
984             xw[4] = x[4 + idx];
985             xw[5] = x[5 + idx];
986             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
987             v += 36;
988           }
989           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
990           x[i2] += xw[0];
991           x[i2 + 1] += xw[1];
992           x[i2 + 2] += xw[2];
993           x[i2 + 3] += xw[3];
994           x[i2 + 4] += xw[4];
995           x[i2 + 5] += xw[5];
996           idiag += 36;
997           i2 += 6;
998         }
999         break;
1000       case 7:
1001         for (i = 0; i < m; i++) {
1002           v    = aa + 49 * ai[i];
1003           vi   = aj + ai[i];
1004           nz   = ai[i + 1] - ai[i];
1005           s[0] = b[i2];
1006           s[1] = b[i2 + 1];
1007           s[2] = b[i2 + 2];
1008           s[3] = b[i2 + 3];
1009           s[4] = b[i2 + 4];
1010           s[5] = b[i2 + 5];
1011           s[6] = b[i2 + 6];
1012           while (nz--) {
1013             idx   = 7 * (*vi++);
1014             xw[0] = x[idx];
1015             xw[1] = x[1 + idx];
1016             xw[2] = x[2 + idx];
1017             xw[3] = x[3 + idx];
1018             xw[4] = x[4 + idx];
1019             xw[5] = x[5 + idx];
1020             xw[6] = x[6 + idx];
1021             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1022             v += 49;
1023           }
1024           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1025           x[i2] += xw[0];
1026           x[i2 + 1] += xw[1];
1027           x[i2 + 2] += xw[2];
1028           x[i2 + 3] += xw[3];
1029           x[i2 + 4] += xw[4];
1030           x[i2 + 5] += xw[5];
1031           x[i2 + 6] += xw[6];
1032           idiag += 49;
1033           i2 += 7;
1034         }
1035         break;
1036       default:
1037         for (i = 0; i < m; i++) {
1038           v  = aa + bs2 * ai[i];
1039           vi = aj + ai[i];
1040           nz = ai[i + 1] - ai[i];
1041 
1042           PetscCall(PetscArraycpy(w, b + i2, bs));
1043           /* copy all rows of x that are needed into contiguous space */
1044           workt = work;
1045           for (j = 0; j < nz; j++) {
1046             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1047             workt += bs;
1048           }
1049           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1050           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1051 
1052           idiag += bs2;
1053           i2 += bs;
1054         }
1055         break;
1056       }
1057       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1058     }
1059     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1060       idiag = a->idiag + bs2 * (a->mbs - 1);
1061       i2    = bs * (m - 1);
1062       switch (bs) {
1063       case 1:
1064         for (i = m - 1; i >= 0; i--) {
1065           v    = aa + ai[i];
1066           vi   = aj + ai[i];
1067           nz   = ai[i + 1] - ai[i];
1068           s[0] = b[i2];
1069           for (j = 0; j < nz; j++) {
1070             xw[0] = x[vi[j]];
1071             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1072           }
1073           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1074           x[i2] += xw[0];
1075           idiag -= 1;
1076           i2 -= 1;
1077         }
1078         break;
1079       case 2:
1080         for (i = m - 1; i >= 0; i--) {
1081           v    = aa + 4 * ai[i];
1082           vi   = aj + ai[i];
1083           nz   = ai[i + 1] - ai[i];
1084           s[0] = b[i2];
1085           s[1] = b[i2 + 1];
1086           for (j = 0; j < nz; j++) {
1087             idx   = 2 * vi[j];
1088             it    = 4 * j;
1089             xw[0] = x[idx];
1090             xw[1] = x[1 + idx];
1091             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1092           }
1093           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
1094           x[i2] += xw[0];
1095           x[i2 + 1] += xw[1];
1096           idiag -= 4;
1097           i2 -= 2;
1098         }
1099         break;
1100       case 3:
1101         for (i = m - 1; i >= 0; i--) {
1102           v    = aa + 9 * ai[i];
1103           vi   = aj + ai[i];
1104           nz   = ai[i + 1] - ai[i];
1105           s[0] = b[i2];
1106           s[1] = b[i2 + 1];
1107           s[2] = b[i2 + 2];
1108           while (nz--) {
1109             idx   = 3 * (*vi++);
1110             xw[0] = x[idx];
1111             xw[1] = x[1 + idx];
1112             xw[2] = x[2 + idx];
1113             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1114             v += 9;
1115           }
1116           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
1117           x[i2] += xw[0];
1118           x[i2 + 1] += xw[1];
1119           x[i2 + 2] += xw[2];
1120           idiag -= 9;
1121           i2 -= 3;
1122         }
1123         break;
1124       case 4:
1125         for (i = m - 1; i >= 0; i--) {
1126           v    = aa + 16 * ai[i];
1127           vi   = aj + ai[i];
1128           nz   = ai[i + 1] - ai[i];
1129           s[0] = b[i2];
1130           s[1] = b[i2 + 1];
1131           s[2] = b[i2 + 2];
1132           s[3] = b[i2 + 3];
1133           while (nz--) {
1134             idx   = 4 * (*vi++);
1135             xw[0] = x[idx];
1136             xw[1] = x[1 + idx];
1137             xw[2] = x[2 + idx];
1138             xw[3] = x[3 + idx];
1139             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1140             v += 16;
1141           }
1142           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
1143           x[i2] += xw[0];
1144           x[i2 + 1] += xw[1];
1145           x[i2 + 2] += xw[2];
1146           x[i2 + 3] += xw[3];
1147           idiag -= 16;
1148           i2 -= 4;
1149         }
1150         break;
1151       case 5:
1152         for (i = m - 1; i >= 0; i--) {
1153           v    = aa + 25 * ai[i];
1154           vi   = aj + ai[i];
1155           nz   = ai[i + 1] - ai[i];
1156           s[0] = b[i2];
1157           s[1] = b[i2 + 1];
1158           s[2] = b[i2 + 2];
1159           s[3] = b[i2 + 3];
1160           s[4] = b[i2 + 4];
1161           while (nz--) {
1162             idx   = 5 * (*vi++);
1163             xw[0] = x[idx];
1164             xw[1] = x[1 + idx];
1165             xw[2] = x[2 + idx];
1166             xw[3] = x[3 + idx];
1167             xw[4] = x[4 + idx];
1168             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1169             v += 25;
1170           }
1171           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
1172           x[i2] += xw[0];
1173           x[i2 + 1] += xw[1];
1174           x[i2 + 2] += xw[2];
1175           x[i2 + 3] += xw[3];
1176           x[i2 + 4] += xw[4];
1177           idiag -= 25;
1178           i2 -= 5;
1179         }
1180         break;
1181       case 6:
1182         for (i = m - 1; i >= 0; i--) {
1183           v    = aa + 36 * ai[i];
1184           vi   = aj + ai[i];
1185           nz   = ai[i + 1] - ai[i];
1186           s[0] = b[i2];
1187           s[1] = b[i2 + 1];
1188           s[2] = b[i2 + 2];
1189           s[3] = b[i2 + 3];
1190           s[4] = b[i2 + 4];
1191           s[5] = b[i2 + 5];
1192           while (nz--) {
1193             idx   = 6 * (*vi++);
1194             xw[0] = x[idx];
1195             xw[1] = x[1 + idx];
1196             xw[2] = x[2 + idx];
1197             xw[3] = x[3 + idx];
1198             xw[4] = x[4 + idx];
1199             xw[5] = x[5 + idx];
1200             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1201             v += 36;
1202           }
1203           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
1204           x[i2] += xw[0];
1205           x[i2 + 1] += xw[1];
1206           x[i2 + 2] += xw[2];
1207           x[i2 + 3] += xw[3];
1208           x[i2 + 4] += xw[4];
1209           x[i2 + 5] += xw[5];
1210           idiag -= 36;
1211           i2 -= 6;
1212         }
1213         break;
1214       case 7:
1215         for (i = m - 1; i >= 0; i--) {
1216           v    = aa + 49 * ai[i];
1217           vi   = aj + ai[i];
1218           nz   = ai[i + 1] - ai[i];
1219           s[0] = b[i2];
1220           s[1] = b[i2 + 1];
1221           s[2] = b[i2 + 2];
1222           s[3] = b[i2 + 3];
1223           s[4] = b[i2 + 4];
1224           s[5] = b[i2 + 5];
1225           s[6] = b[i2 + 6];
1226           while (nz--) {
1227             idx   = 7 * (*vi++);
1228             xw[0] = x[idx];
1229             xw[1] = x[1 + idx];
1230             xw[2] = x[2 + idx];
1231             xw[3] = x[3 + idx];
1232             xw[4] = x[4 + idx];
1233             xw[5] = x[5 + idx];
1234             xw[6] = x[6 + idx];
1235             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1236             v += 49;
1237           }
1238           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1239           x[i2] += xw[0];
1240           x[i2 + 1] += xw[1];
1241           x[i2 + 2] += xw[2];
1242           x[i2 + 3] += xw[3];
1243           x[i2 + 4] += xw[4];
1244           x[i2 + 5] += xw[5];
1245           x[i2 + 6] += xw[6];
1246           idiag -= 49;
1247           i2 -= 7;
1248         }
1249         break;
1250       default:
1251         for (i = m - 1; i >= 0; i--) {
1252           v  = aa + bs2 * ai[i];
1253           vi = aj + ai[i];
1254           nz = ai[i + 1] - ai[i];
1255 
1256           PetscCall(PetscArraycpy(w, b + i2, bs));
1257           /* copy all rows of x that are needed into contiguous space */
1258           workt = work;
1259           for (j = 0; j < nz; j++) {
1260             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1261             workt += bs;
1262           }
1263           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1264           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1265 
1266           idiag -= bs2;
1267           i2 -= bs;
1268         }
1269         break;
1270       }
1271       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1272     }
1273   }
1274   PetscCall(VecRestoreArray(xx, &x));
1275   PetscCall(VecRestoreArrayRead(bb, &b));
1276   PetscFunctionReturn(PETSC_SUCCESS);
1277 }
1278 
1279 /*
1280     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1281 */
1282 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1283   #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1284 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1285   #define matsetvaluesblocked4_ matsetvaluesblocked4
1286 #endif
1287 
1288 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1289 {
1290   Mat                A = *AA;
1291   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1292   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1293   PetscInt          *ai = a->i, *ailen = a->ilen;
1294   PetscInt          *aj = a->j, stepval, lastcol = -1;
1295   const PetscScalar *value = v;
1296   MatScalar         *ap, *aa = a->a, *bap;
1297 
1298   PetscFunctionBegin;
1299   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1300   stepval = (n - 1) * 4;
1301   for (k = 0; k < m; k++) { /* loop over added rows */
1302     row  = im[k];
1303     rp   = aj + ai[row];
1304     ap   = aa + 16 * ai[row];
1305     nrow = ailen[row];
1306     low  = 0;
1307     high = nrow;
1308     for (l = 0; l < n; l++) { /* loop over added columns */
1309       col = in[l];
1310       if (col <= lastcol) low = 0;
1311       else high = nrow;
1312       lastcol = col;
1313       value   = v + k * (stepval + 4 + l) * 4;
1314       while (high - low > 7) {
1315         t = (low + high) / 2;
1316         if (rp[t] > col) high = t;
1317         else low = t;
1318       }
1319       for (i = low; i < high; i++) {
1320         if (rp[i] > col) break;
1321         if (rp[i] == col) {
1322           bap = ap + 16 * i;
1323           for (ii = 0; ii < 4; ii++, value += stepval) {
1324             for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1325           }
1326           goto noinsert2;
1327         }
1328       }
1329       N = nrow++ - 1;
1330       high++; /* added new column index thus must search to one higher than before */
1331       /* shift up all the later entries in this row */
1332       for (ii = N; ii >= i; ii--) {
1333         rp[ii + 1] = rp[ii];
1334         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1335       }
1336       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1337       rp[i] = col;
1338       bap   = ap + 16 * i;
1339       for (ii = 0; ii < 4; ii++, value += stepval) {
1340         for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1341       }
1342     noinsert2:;
1343       low = i;
1344     }
1345     ailen[row] = nrow;
1346   }
1347   PetscFunctionReturnVoid();
1348 }
1349 
1350 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1351   #define matsetvalues4_ MATSETVALUES4
1352 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1353   #define matsetvalues4_ matsetvalues4
1354 #endif
1355 
1356 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1357 {
1358   Mat          A = *AA;
1359   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1360   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1361   PetscInt    *ai = a->i, *ailen = a->ilen;
1362   PetscInt    *aj = a->j, brow, bcol;
1363   PetscInt     ridx, cidx, lastcol = -1;
1364   MatScalar   *ap, value, *aa      = a->a, *bap;
1365 
1366   PetscFunctionBegin;
1367   for (k = 0; k < m; k++) { /* loop over added rows */
1368     row  = im[k];
1369     brow = row / 4;
1370     rp   = aj + ai[brow];
1371     ap   = aa + 16 * ai[brow];
1372     nrow = ailen[brow];
1373     low  = 0;
1374     high = nrow;
1375     for (l = 0; l < n; l++) { /* loop over added columns */
1376       col   = in[l];
1377       bcol  = col / 4;
1378       ridx  = row % 4;
1379       cidx  = col % 4;
1380       value = v[l + k * n];
1381       if (col <= lastcol) low = 0;
1382       else high = nrow;
1383       lastcol = col;
1384       while (high - low > 7) {
1385         t = (low + high) / 2;
1386         if (rp[t] > bcol) high = t;
1387         else low = t;
1388       }
1389       for (i = low; i < high; i++) {
1390         if (rp[i] > bcol) break;
1391         if (rp[i] == bcol) {
1392           bap = ap + 16 * i + 4 * cidx + ridx;
1393           *bap += value;
1394           goto noinsert1;
1395         }
1396       }
1397       N = nrow++ - 1;
1398       high++; /* added new column thus must search to one higher than before */
1399       /* shift up all the later entries in this row */
1400       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
1401       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
1402       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1403       rp[i]                        = bcol;
1404       ap[16 * i + 4 * cidx + ridx] = value;
1405     noinsert1:;
1406       low = i;
1407     }
1408     ailen[brow] = nrow;
1409   }
1410   PetscFunctionReturnVoid();
1411 }
1412 
1413 /*
1414      Checks for missing diagonals
1415 */
1416 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1417 {
1418   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1419   PetscInt    *diag, *ii = a->i, i;
1420 
1421   PetscFunctionBegin;
1422   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
1423   *missing = PETSC_FALSE;
1424   if (A->rmap->n > 0 && !ii) {
1425     *missing = PETSC_TRUE;
1426     if (d) *d = 0;
1427     PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n"));
1428   } else {
1429     PetscInt n;
1430     n    = PetscMin(a->mbs, a->nbs);
1431     diag = a->diag;
1432     for (i = 0; i < n; i++) {
1433       if (diag[i] >= ii[i + 1]) {
1434         *missing = PETSC_TRUE;
1435         if (d) *d = i;
1436         PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i));
1437         break;
1438       }
1439     }
1440   }
1441   PetscFunctionReturn(PETSC_SUCCESS);
1442 }
1443 
1444 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1445 {
1446   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1447   PetscInt     i, j, m = a->mbs;
1448 
1449   PetscFunctionBegin;
1450   if (!a->diag) {
1451     PetscCall(PetscMalloc1(m, &a->diag));
1452     a->free_diag = PETSC_TRUE;
1453   }
1454   for (i = 0; i < m; i++) {
1455     a->diag[i] = a->i[i + 1];
1456     for (j = a->i[i]; j < a->i[i + 1]; j++) {
1457       if (a->j[j] == i) {
1458         a->diag[i] = j;
1459         break;
1460       }
1461     }
1462   }
1463   PetscFunctionReturn(PETSC_SUCCESS);
1464 }
1465 
1466 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1467 {
1468   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1469   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
1470   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
1471 
1472   PetscFunctionBegin;
1473   *nn = n;
1474   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1475   if (symmetric) {
1476     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1477     nz = tia[n];
1478   } else {
1479     tia = a->i;
1480     tja = a->j;
1481   }
1482 
1483   if (!blockcompressed && bs > 1) {
1484     (*nn) *= bs;
1485     /* malloc & create the natural set of indices */
1486     PetscCall(PetscMalloc1((n + 1) * bs, ia));
1487     if (n) {
1488       (*ia)[0] = oshift;
1489       for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
1490     }
1491 
1492     for (i = 1; i < n; i++) {
1493       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1494       for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
1495     }
1496     if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1497 
1498     if (inja) {
1499       PetscCall(PetscMalloc1(nz * bs * bs, ja));
1500       cnt = 0;
1501       for (i = 0; i < n; i++) {
1502         for (j = 0; j < bs; j++) {
1503           for (k = tia[i]; k < tia[i + 1]; k++) {
1504             for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
1505           }
1506         }
1507       }
1508     }
1509 
1510     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1511       PetscCall(PetscFree(tia));
1512       PetscCall(PetscFree(tja));
1513     }
1514   } else if (oshift == 1) {
1515     if (symmetric) {
1516       nz = tia[A->rmap->n / bs];
1517       /*  add 1 to i and j indices */
1518       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1519       *ia = tia;
1520       if (ja) {
1521         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1522         *ja = tja;
1523       }
1524     } else {
1525       nz = a->i[A->rmap->n / bs];
1526       /* malloc space and  add 1 to i and j indices */
1527       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1528       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1529       if (ja) {
1530         PetscCall(PetscMalloc1(nz, ja));
1531         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1532       }
1533     }
1534   } else {
1535     *ia = tia;
1536     if (ja) *ja = tja;
1537   }
1538   PetscFunctionReturn(PETSC_SUCCESS);
1539 }
1540 
1541 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1542 {
1543   PetscFunctionBegin;
1544   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1545   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1546     PetscCall(PetscFree(*ia));
1547     if (ja) PetscCall(PetscFree(*ja));
1548   }
1549   PetscFunctionReturn(PETSC_SUCCESS);
1550 }
1551 
1552 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1553 {
1554   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1555 
1556   PetscFunctionBegin;
1557   if (A->hash_active) {
1558     PetscInt bs;
1559     A->ops[0] = a->cops;
1560     PetscCall(PetscHMapIJVDestroy(&a->ht));
1561     PetscCall(MatGetBlockSize(A, &bs));
1562     if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht));
1563     PetscCall(PetscFree(a->dnz));
1564     PetscCall(PetscFree(a->bdnz));
1565     A->hash_active = PETSC_FALSE;
1566   }
1567 #if defined(PETSC_USE_LOG)
1568   PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
1569 #endif
1570   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
1571   PetscCall(ISDestroy(&a->row));
1572   PetscCall(ISDestroy(&a->col));
1573   if (a->free_diag) PetscCall(PetscFree(a->diag));
1574   PetscCall(PetscFree(a->idiag));
1575   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
1576   PetscCall(PetscFree(a->solve_work));
1577   PetscCall(PetscFree(a->mult_work));
1578   PetscCall(PetscFree(a->sor_workt));
1579   PetscCall(PetscFree(a->sor_work));
1580   PetscCall(ISDestroy(&a->icol));
1581   PetscCall(PetscFree(a->saved_values));
1582   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1583 
1584   PetscCall(MatDestroy(&a->sbaijMat));
1585   PetscCall(MatDestroy(&a->parent));
1586   PetscCall(PetscFree(A->data));
1587 
1588   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
1589   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
1590   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
1591   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
1592   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
1593   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
1594   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
1595   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
1596   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
1597   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
1598   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
1599   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
1600 #if defined(PETSC_HAVE_HYPRE)
1601   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
1602 #endif
1603   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
1604   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1605   PetscFunctionReturn(PETSC_SUCCESS);
1606 }
1607 
1608 PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1609 {
1610   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1611 
1612   PetscFunctionBegin;
1613   switch (op) {
1614   case MAT_ROW_ORIENTED:
1615     a->roworiented = flg;
1616     break;
1617   case MAT_KEEP_NONZERO_PATTERN:
1618     a->keepnonzeropattern = flg;
1619     break;
1620   case MAT_NEW_NONZERO_LOCATIONS:
1621     a->nonew = (flg ? 0 : 1);
1622     break;
1623   case MAT_NEW_NONZERO_LOCATION_ERR:
1624     a->nonew = (flg ? -1 : 0);
1625     break;
1626   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1627     a->nonew = (flg ? -2 : 0);
1628     break;
1629   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1630     a->nounused = (flg ? -1 : 0);
1631     break;
1632   case MAT_FORCE_DIAGONAL_ENTRIES:
1633   case MAT_IGNORE_OFF_PROC_ENTRIES:
1634   case MAT_USE_HASH_TABLE:
1635   case MAT_SORTED_FULL:
1636     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1637     break;
1638   case MAT_SPD:
1639   case MAT_SYMMETRIC:
1640   case MAT_STRUCTURALLY_SYMMETRIC:
1641   case MAT_HERMITIAN:
1642   case MAT_SYMMETRY_ETERNAL:
1643   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1644   case MAT_SUBMAT_SINGLEIS:
1645   case MAT_STRUCTURE_ONLY:
1646   case MAT_SPD_ETERNAL:
1647     /* if the diagonal matrix is square it inherits some of the properties above */
1648     break;
1649   default:
1650     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1651   }
1652   PetscFunctionReturn(PETSC_SUCCESS);
1653 }
1654 
1655 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1656 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1657 {
1658   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
1659   MatScalar   *aa_i;
1660   PetscScalar *v_i;
1661 
1662   PetscFunctionBegin;
1663   bs  = A->rmap->bs;
1664   bs2 = bs * bs;
1665   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1666 
1667   bn  = row / bs; /* Block number */
1668   bp  = row % bs; /* Block Position */
1669   M   = ai[bn + 1] - ai[bn];
1670   *nz = bs * M;
1671 
1672   if (v) {
1673     *v = NULL;
1674     if (*nz) {
1675       PetscCall(PetscMalloc1(*nz, v));
1676       for (i = 0; i < M; i++) { /* for each block in the block row */
1677         v_i  = *v + i * bs;
1678         aa_i = aa + bs2 * (ai[bn] + i);
1679         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
1680       }
1681     }
1682   }
1683 
1684   if (idx) {
1685     *idx = NULL;
1686     if (*nz) {
1687       PetscCall(PetscMalloc1(*nz, idx));
1688       for (i = 0; i < M; i++) { /* for each block in the block row */
1689         idx_i = *idx + i * bs;
1690         itmp  = bs * aj[ai[bn] + i];
1691         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
1692       }
1693     }
1694   }
1695   PetscFunctionReturn(PETSC_SUCCESS);
1696 }
1697 
1698 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1699 {
1700   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1701 
1702   PetscFunctionBegin;
1703   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
1704   PetscFunctionReturn(PETSC_SUCCESS);
1705 }
1706 
1707 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1708 {
1709   PetscFunctionBegin;
1710   if (nz) *nz = 0;
1711   if (idx) PetscCall(PetscFree(*idx));
1712   if (v) PetscCall(PetscFree(*v));
1713   PetscFunctionReturn(PETSC_SUCCESS);
1714 }
1715 
1716 PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1717 {
1718   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
1719   Mat          C;
1720   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
1721   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
1722   MatScalar   *ata, *aa = a->a;
1723 
1724   PetscFunctionBegin;
1725   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1726   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1727   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1728     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1729 
1730     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
1731     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
1732     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
1733     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
1734 
1735     at  = (Mat_SeqBAIJ *)C->data;
1736     ati = at->i;
1737     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1738   } else {
1739     C   = *B;
1740     at  = (Mat_SeqBAIJ *)C->data;
1741     ati = at->i;
1742   }
1743 
1744   atj = at->j;
1745   ata = at->a;
1746 
1747   /* Copy ati into atfill so we have locations of the next free space in atj */
1748   PetscCall(PetscArraycpy(atfill, ati, nbs));
1749 
1750   /* Walk through A row-wise and mark nonzero entries of A^T. */
1751   for (i = 0; i < mbs; i++) {
1752     anzj = ai[i + 1] - ai[i];
1753     for (j = 0; j < anzj; j++) {
1754       atj[atfill[*aj]] = i;
1755       for (kr = 0; kr < bs; kr++) {
1756         for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
1757       }
1758       atfill[*aj++] += 1;
1759     }
1760   }
1761   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1762   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1763 
1764   /* Clean up temporary space and complete requests. */
1765   PetscCall(PetscFree(atfill));
1766 
1767   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1768     PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1769     *B = C;
1770   } else {
1771     PetscCall(MatHeaderMerge(A, &C));
1772   }
1773   PetscFunctionReturn(PETSC_SUCCESS);
1774 }
1775 
1776 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1777 {
1778   Mat Btrans;
1779 
1780   PetscFunctionBegin;
1781   *f = PETSC_FALSE;
1782   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
1783   PetscCall(MatEqual_SeqBAIJ(B, Btrans, f));
1784   PetscCall(MatDestroy(&Btrans));
1785   PetscFunctionReturn(PETSC_SUCCESS);
1786 }
1787 
1788 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1789 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1790 {
1791   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1792   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1793   PetscInt    *rowlens, *colidxs;
1794   PetscScalar *matvals;
1795 
1796   PetscFunctionBegin;
1797   PetscCall(PetscViewerSetUp(viewer));
1798 
1799   M  = mat->rmap->N;
1800   N  = mat->cmap->N;
1801   m  = mat->rmap->n;
1802   bs = mat->rmap->bs;
1803   nz = bs * bs * A->nz;
1804 
1805   /* write matrix header */
1806   header[0] = MAT_FILE_CLASSID;
1807   header[1] = M;
1808   header[2] = N;
1809   header[3] = nz;
1810   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1811 
1812   /* store row lengths */
1813   PetscCall(PetscMalloc1(m, &rowlens));
1814   for (cnt = 0, i = 0; i < A->mbs; i++)
1815     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
1816   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
1817   PetscCall(PetscFree(rowlens));
1818 
1819   /* store column indices  */
1820   PetscCall(PetscMalloc1(nz, &colidxs));
1821   for (cnt = 0, i = 0; i < A->mbs; i++)
1822     for (k = 0; k < bs; k++)
1823       for (j = A->i[i]; j < A->i[i + 1]; j++)
1824         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
1825   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1826   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
1827   PetscCall(PetscFree(colidxs));
1828 
1829   /* store nonzero values */
1830   PetscCall(PetscMalloc1(nz, &matvals));
1831   for (cnt = 0, i = 0; i < A->mbs; i++)
1832     for (k = 0; k < bs; k++)
1833       for (j = A->i[i]; j < A->i[i + 1]; j++)
1834         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
1835   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1836   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
1837   PetscCall(PetscFree(matvals));
1838 
1839   /* write block size option to the viewer's .info file */
1840   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1841   PetscFunctionReturn(PETSC_SUCCESS);
1842 }
1843 
1844 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1845 {
1846   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1847   PetscInt     i, bs = A->rmap->bs, k;
1848 
1849   PetscFunctionBegin;
1850   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1851   for (i = 0; i < a->mbs; i++) {
1852     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
1853     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
1854     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1855   }
1856   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1857   PetscFunctionReturn(PETSC_SUCCESS);
1858 }
1859 
1860 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1861 {
1862   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1863   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1864   PetscViewerFormat format;
1865 
1866   PetscFunctionBegin;
1867   if (A->structure_only) {
1868     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
1869     PetscFunctionReturn(PETSC_SUCCESS);
1870   }
1871 
1872   PetscCall(PetscViewerGetFormat(viewer, &format));
1873   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1874     PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1875   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1876     const char *matname;
1877     Mat         aij;
1878     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
1879     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
1880     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
1881     PetscCall(MatView(aij, viewer));
1882     PetscCall(MatDestroy(&aij));
1883   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1884     PetscFunctionReturn(PETSC_SUCCESS);
1885   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1886     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1887     for (i = 0; i < a->mbs; i++) {
1888       for (j = 0; j < bs; j++) {
1889         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1890         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1891           for (l = 0; l < bs; l++) {
1892 #if defined(PETSC_USE_COMPLEX)
1893             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1894               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1895             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1896               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1897             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1898               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1899             }
1900 #else
1901             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1902 #endif
1903           }
1904         }
1905         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1906       }
1907     }
1908     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1909   } else {
1910     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1911     for (i = 0; i < a->mbs; i++) {
1912       for (j = 0; j < bs; j++) {
1913         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1914         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1915           for (l = 0; l < bs; l++) {
1916 #if defined(PETSC_USE_COMPLEX)
1917             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
1918               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1919             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
1920               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1921             } else {
1922               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1923             }
1924 #else
1925             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1926 #endif
1927           }
1928         }
1929         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1930       }
1931     }
1932     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1933   }
1934   PetscCall(PetscViewerFlush(viewer));
1935   PetscFunctionReturn(PETSC_SUCCESS);
1936 }
1937 
1938 #include <petscdraw.h>
1939 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1940 {
1941   Mat               A = (Mat)Aa;
1942   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1943   PetscInt          row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2;
1944   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1945   MatScalar        *aa;
1946   PetscViewer       viewer;
1947   PetscViewerFormat format;
1948 
1949   PetscFunctionBegin;
1950   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
1951   PetscCall(PetscViewerGetFormat(viewer, &format));
1952   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
1953 
1954   /* loop over matrix elements drawing boxes */
1955 
1956   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1957     PetscDrawCollectiveBegin(draw);
1958     /* Blue for negative, Cyan for zero and  Red for positive */
1959     color = PETSC_DRAW_BLUE;
1960     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1961       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1962         y_l = A->rmap->N - row - 1.0;
1963         y_r = y_l + 1.0;
1964         x_l = a->j[j] * bs;
1965         x_r = x_l + 1.0;
1966         aa  = a->a + j * bs2;
1967         for (k = 0; k < bs; k++) {
1968           for (l = 0; l < bs; l++) {
1969             if (PetscRealPart(*aa++) >= 0.) continue;
1970             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1971           }
1972         }
1973       }
1974     }
1975     color = PETSC_DRAW_CYAN;
1976     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1977       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1978         y_l = A->rmap->N - row - 1.0;
1979         y_r = y_l + 1.0;
1980         x_l = a->j[j] * bs;
1981         x_r = x_l + 1.0;
1982         aa  = a->a + j * bs2;
1983         for (k = 0; k < bs; k++) {
1984           for (l = 0; l < bs; l++) {
1985             if (PetscRealPart(*aa++) != 0.) continue;
1986             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1987           }
1988         }
1989       }
1990     }
1991     color = PETSC_DRAW_RED;
1992     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1993       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1994         y_l = A->rmap->N - row - 1.0;
1995         y_r = y_l + 1.0;
1996         x_l = a->j[j] * bs;
1997         x_r = x_l + 1.0;
1998         aa  = a->a + j * bs2;
1999         for (k = 0; k < bs; k++) {
2000           for (l = 0; l < bs; l++) {
2001             if (PetscRealPart(*aa++) <= 0.) continue;
2002             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2003           }
2004         }
2005       }
2006     }
2007     PetscDrawCollectiveEnd(draw);
2008   } else {
2009     /* use contour shading to indicate magnitude of values */
2010     /* first determine max of all nonzero values */
2011     PetscReal minv = 0.0, maxv = 0.0;
2012     PetscDraw popup;
2013 
2014     for (i = 0; i < a->nz * a->bs2; i++) {
2015       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
2016     }
2017     if (minv >= maxv) maxv = minv + PETSC_SMALL;
2018     PetscCall(PetscDrawGetPopup(draw, &popup));
2019     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
2020 
2021     PetscDrawCollectiveBegin(draw);
2022     for (i = 0, row = 0; i < mbs; i++, row += bs) {
2023       for (j = a->i[i]; j < a->i[i + 1]; j++) {
2024         y_l = A->rmap->N - row - 1.0;
2025         y_r = y_l + 1.0;
2026         x_l = a->j[j] * bs;
2027         x_r = x_l + 1.0;
2028         aa  = a->a + j * bs2;
2029         for (k = 0; k < bs; k++) {
2030           for (l = 0; l < bs; l++) {
2031             MatScalar v = *aa++;
2032             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
2033             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2034           }
2035         }
2036       }
2037     }
2038     PetscDrawCollectiveEnd(draw);
2039   }
2040   PetscFunctionReturn(PETSC_SUCCESS);
2041 }
2042 
2043 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2044 {
2045   PetscReal xl, yl, xr, yr, w, h;
2046   PetscDraw draw;
2047   PetscBool isnull;
2048 
2049   PetscFunctionBegin;
2050   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
2051   PetscCall(PetscDrawIsNull(draw, &isnull));
2052   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
2053 
2054   xr = A->cmap->n;
2055   yr = A->rmap->N;
2056   h  = yr / 10.0;
2057   w  = xr / 10.0;
2058   xr += w;
2059   yr += h;
2060   xl = -w;
2061   yl = -h;
2062   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
2063   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
2064   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
2065   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
2066   PetscCall(PetscDrawSave(draw));
2067   PetscFunctionReturn(PETSC_SUCCESS);
2068 }
2069 
2070 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2071 {
2072   PetscBool iascii, isbinary, isdraw;
2073 
2074   PetscFunctionBegin;
2075   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2076   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2077   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
2078   if (iascii) {
2079     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
2080   } else if (isbinary) {
2081     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
2082   } else if (isdraw) {
2083     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
2084   } else {
2085     Mat B;
2086     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2087     PetscCall(MatView(B, viewer));
2088     PetscCall(MatDestroy(&B));
2089   }
2090   PetscFunctionReturn(PETSC_SUCCESS);
2091 }
2092 
2093 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2094 {
2095   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2096   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2097   PetscInt    *ai = a->i, *ailen = a->ilen;
2098   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
2099   MatScalar   *ap, *aa = a->a;
2100 
2101   PetscFunctionBegin;
2102   for (k = 0; k < m; k++) { /* loop over rows */
2103     row  = im[k];
2104     brow = row / bs;
2105     if (row < 0) {
2106       v += n;
2107       continue;
2108     } /* negative row */
2109     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2110     rp   = aj ? aj + ai[brow] : NULL;       /* mustn't add to NULL, that is UB */
2111     ap   = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */
2112     nrow = ailen[brow];
2113     for (l = 0; l < n; l++) { /* loop over columns */
2114       if (in[l] < 0) {
2115         v++;
2116         continue;
2117       } /* negative column */
2118       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
2119       col  = in[l];
2120       bcol = col / bs;
2121       cidx = col % bs;
2122       ridx = row % bs;
2123       high = nrow;
2124       low  = 0; /* assume unsorted */
2125       while (high - low > 5) {
2126         t = (low + high) / 2;
2127         if (rp[t] > bcol) high = t;
2128         else low = t;
2129       }
2130       for (i = low; i < high; i++) {
2131         if (rp[i] > bcol) break;
2132         if (rp[i] == bcol) {
2133           *v++ = ap[bs2 * i + bs * cidx + ridx];
2134           goto finished;
2135         }
2136       }
2137       *v++ = 0.0;
2138     finished:;
2139     }
2140   }
2141   PetscFunctionReturn(PETSC_SUCCESS);
2142 }
2143 
2144 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2145 {
2146   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2147   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2148   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2149   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2150   PetscBool          roworiented = a->roworiented;
2151   const PetscScalar *value       = v;
2152   MatScalar         *ap = NULL, *aa = a->a, *bap;
2153 
2154   PetscFunctionBegin;
2155   if (roworiented) {
2156     stepval = (n - 1) * bs;
2157   } else {
2158     stepval = (m - 1) * bs;
2159   }
2160   for (k = 0; k < m; k++) { /* loop over added rows */
2161     row = im[k];
2162     if (row < 0) continue;
2163     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
2164     rp = aj + ai[row];
2165     if (!A->structure_only) ap = aa + bs2 * ai[row];
2166     rmax = imax[row];
2167     nrow = ailen[row];
2168     low  = 0;
2169     high = nrow;
2170     for (l = 0; l < n; l++) { /* loop over added columns */
2171       if (in[l] < 0) continue;
2172       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
2173       col = in[l];
2174       if (!A->structure_only) {
2175         if (roworiented) {
2176           value = v + (k * (stepval + bs) + l) * bs;
2177         } else {
2178           value = v + (l * (stepval + bs) + k) * bs;
2179         }
2180       }
2181       if (col <= lastcol) low = 0;
2182       else high = nrow;
2183       lastcol = col;
2184       while (high - low > 7) {
2185         t = (low + high) / 2;
2186         if (rp[t] > col) high = t;
2187         else low = t;
2188       }
2189       for (i = low; i < high; i++) {
2190         if (rp[i] > col) break;
2191         if (rp[i] == col) {
2192           if (A->structure_only) goto noinsert2;
2193           bap = ap + bs2 * i;
2194           if (roworiented) {
2195             if (is == ADD_VALUES) {
2196               for (ii = 0; ii < bs; ii++, value += stepval) {
2197                 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2198               }
2199             } else {
2200               for (ii = 0; ii < bs; ii++, value += stepval) {
2201                 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2202               }
2203             }
2204           } else {
2205             if (is == ADD_VALUES) {
2206               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2207                 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
2208                 bap += bs;
2209               }
2210             } else {
2211               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2212                 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
2213                 bap += bs;
2214               }
2215             }
2216           }
2217           goto noinsert2;
2218         }
2219       }
2220       if (nonew == 1) goto noinsert2;
2221       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2222       if (A->structure_only) {
2223         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
2224       } else {
2225         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2226       }
2227       N = nrow++ - 1;
2228       high++;
2229       /* shift up all the later entries in this row */
2230       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2231       rp[i] = col;
2232       if (!A->structure_only) {
2233         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2234         bap = ap + bs2 * i;
2235         if (roworiented) {
2236           for (ii = 0; ii < bs; ii++, value += stepval) {
2237             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2238           }
2239         } else {
2240           for (ii = 0; ii < bs; ii++, value += stepval) {
2241             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2242           }
2243         }
2244       }
2245     noinsert2:;
2246       low = i;
2247     }
2248     ailen[row] = nrow;
2249   }
2250   PetscFunctionReturn(PETSC_SUCCESS);
2251 }
2252 
2253 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2254 {
2255   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2256   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2257   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2258   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2259   MatScalar   *aa    = a->a, *ap;
2260   PetscReal    ratio = 0.6;
2261 
2262   PetscFunctionBegin;
2263   if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS);
2264 
2265   if (m) rmax = ailen[0];
2266   for (i = 1; i < mbs; i++) {
2267     /* move each row back by the amount of empty slots (fshift) before it*/
2268     fshift += imax[i - 1] - ailen[i - 1];
2269     rmax = PetscMax(rmax, ailen[i]);
2270     if (fshift) {
2271       ip = aj + ai[i];
2272       ap = aa + bs2 * ai[i];
2273       N  = ailen[i];
2274       PetscCall(PetscArraymove(ip - fshift, ip, N));
2275       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2276     }
2277     ai[i] = ai[i - 1] + ailen[i - 1];
2278   }
2279   if (mbs) {
2280     fshift += imax[mbs - 1] - ailen[mbs - 1];
2281     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2282   }
2283 
2284   /* reset ilen and imax for each row */
2285   a->nonzerorowcnt = 0;
2286   if (A->structure_only) {
2287     PetscCall(PetscFree2(a->imax, a->ilen));
2288   } else { /* !A->structure_only */
2289     for (i = 0; i < mbs; i++) {
2290       ailen[i] = imax[i] = ai[i + 1] - ai[i];
2291       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2292     }
2293   }
2294   a->nz = ai[mbs];
2295 
2296   /* diagonals may have moved, so kill the diagonal pointers */
2297   a->idiagvalid = PETSC_FALSE;
2298   if (fshift && a->diag) PetscCall(PetscFree(a->diag));
2299   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
2300   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
2301   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
2302   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
2303 
2304   A->info.mallocs += a->reallocs;
2305   a->reallocs         = 0;
2306   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2307   a->rmax             = rmax;
2308 
2309   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
2310   PetscFunctionReturn(PETSC_SUCCESS);
2311 }
2312 
2313 /*
2314    This function returns an array of flags which indicate the locations of contiguous
2315    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2316    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2317    Assume: sizes should be long enough to hold all the values.
2318 */
2319 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2320 {
2321   PetscInt j = 0;
2322 
2323   PetscFunctionBegin;
2324   for (PetscInt i = 0; i < n; j++) {
2325     PetscInt row = idx[i];
2326     if (row % bs != 0) { /* Not the beginning of a block */
2327       sizes[j] = 1;
2328       i++;
2329     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2330       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2331       i++;
2332     } else { /* Beginning of the block, so check if the complete block exists */
2333       PetscBool flg = PETSC_TRUE;
2334       for (PetscInt k = 1; k < bs; k++) {
2335         if (row + k != idx[i + k]) { /* break in the block */
2336           flg = PETSC_FALSE;
2337           break;
2338         }
2339       }
2340       if (flg) { /* No break in the bs */
2341         sizes[j] = bs;
2342         i += bs;
2343       } else {
2344         sizes[j] = 1;
2345         i++;
2346       }
2347     }
2348   }
2349   *bs_max = j;
2350   PetscFunctionReturn(PETSC_SUCCESS);
2351 }
2352 
2353 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2354 {
2355   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2356   PetscInt           i, j, k, count, *rows;
2357   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
2358   PetscScalar        zero = 0.0;
2359   MatScalar         *aa;
2360   const PetscScalar *xx;
2361   PetscScalar       *bb;
2362 
2363   PetscFunctionBegin;
2364   /* fix right hand side if needed */
2365   if (x && b) {
2366     PetscCall(VecGetArrayRead(x, &xx));
2367     PetscCall(VecGetArray(b, &bb));
2368     for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
2369     PetscCall(VecRestoreArrayRead(x, &xx));
2370     PetscCall(VecRestoreArray(b, &bb));
2371   }
2372 
2373   /* Make a copy of the IS and  sort it */
2374   /* allocate memory for rows,sizes */
2375   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2376 
2377   /* copy IS values to rows, and sort them */
2378   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
2379   PetscCall(PetscSortInt(is_n, rows));
2380 
2381   if (baij->keepnonzeropattern) {
2382     for (i = 0; i < is_n; i++) sizes[i] = 1;
2383     bs_max = is_n;
2384   } else {
2385     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2386     A->nonzerostate++;
2387   }
2388 
2389   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2390     row = rows[j];
2391     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2392     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2393     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
2394     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2395       if (diag != (PetscScalar)0.0) {
2396         if (baij->ilen[row / bs] > 0) {
2397           baij->ilen[row / bs]       = 1;
2398           baij->j[baij->i[row / bs]] = row / bs;
2399 
2400           PetscCall(PetscArrayzero(aa, count * bs));
2401         }
2402         /* Now insert all the diagonal values for this bs */
2403         for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES));
2404       } else { /* (diag == 0.0) */
2405         baij->ilen[row / bs] = 0;
2406       }      /* end (diag == 0.0) */
2407     } else { /* (sizes[i] != bs) */
2408       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2409       for (k = 0; k < count; k++) {
2410         aa[0] = zero;
2411         aa += bs;
2412       }
2413       if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES));
2414     }
2415   }
2416 
2417   PetscCall(PetscFree2(rows, sizes));
2418   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2419   PetscFunctionReturn(PETSC_SUCCESS);
2420 }
2421 
2422 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2423 {
2424   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2425   PetscInt           i, j, k, count;
2426   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
2427   PetscScalar        zero = 0.0;
2428   MatScalar         *aa;
2429   const PetscScalar *xx;
2430   PetscScalar       *bb;
2431   PetscBool         *zeroed, vecs = PETSC_FALSE;
2432 
2433   PetscFunctionBegin;
2434   /* fix right hand side if needed */
2435   if (x && b) {
2436     PetscCall(VecGetArrayRead(x, &xx));
2437     PetscCall(VecGetArray(b, &bb));
2438     vecs = PETSC_TRUE;
2439   }
2440 
2441   /* zero the columns */
2442   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
2443   for (i = 0; i < is_n; i++) {
2444     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
2445     zeroed[is_idx[i]] = PETSC_TRUE;
2446   }
2447   for (i = 0; i < A->rmap->N; i++) {
2448     if (!zeroed[i]) {
2449       row = i / bs;
2450       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
2451         for (k = 0; k < bs; k++) {
2452           col = bs * baij->j[j] + k;
2453           if (zeroed[col]) {
2454             aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
2455             if (vecs) bb[i] -= aa[0] * xx[col];
2456             aa[0] = 0.0;
2457           }
2458         }
2459       }
2460     } else if (vecs) bb[i] = diag * xx[i];
2461   }
2462   PetscCall(PetscFree(zeroed));
2463   if (vecs) {
2464     PetscCall(VecRestoreArrayRead(x, &xx));
2465     PetscCall(VecRestoreArray(b, &bb));
2466   }
2467 
2468   /* zero the rows */
2469   for (i = 0; i < is_n; i++) {
2470     row   = is_idx[i];
2471     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2472     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
2473     for (k = 0; k < count; k++) {
2474       aa[0] = zero;
2475       aa += bs;
2476     }
2477     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
2478   }
2479   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2480   PetscFunctionReturn(PETSC_SUCCESS);
2481 }
2482 
2483 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2484 {
2485   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2486   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2487   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2488   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2489   PetscInt     ridx, cidx, bs2                 = a->bs2;
2490   PetscBool    roworiented = a->roworiented;
2491   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;
2492 
2493   PetscFunctionBegin;
2494   for (k = 0; k < m; k++) { /* loop over added rows */
2495     row  = im[k];
2496     brow = row / bs;
2497     if (row < 0) continue;
2498     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
2499     rp = aj + ai[brow];
2500     if (!A->structure_only) ap = aa + bs2 * ai[brow];
2501     rmax = imax[brow];
2502     nrow = ailen[brow];
2503     low  = 0;
2504     high = nrow;
2505     for (l = 0; l < n; l++) { /* loop over added columns */
2506       if (in[l] < 0) continue;
2507       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
2508       col  = in[l];
2509       bcol = col / bs;
2510       ridx = row % bs;
2511       cidx = col % bs;
2512       if (!A->structure_only) {
2513         if (roworiented) {
2514           value = v[l + k * n];
2515         } else {
2516           value = v[k + l * m];
2517         }
2518       }
2519       if (col <= lastcol) low = 0;
2520       else high = nrow;
2521       lastcol = col;
2522       while (high - low > 7) {
2523         t = (low + high) / 2;
2524         if (rp[t] > bcol) high = t;
2525         else low = t;
2526       }
2527       for (i = low; i < high; i++) {
2528         if (rp[i] > bcol) break;
2529         if (rp[i] == bcol) {
2530           bap = ap + bs2 * i + bs * cidx + ridx;
2531           if (!A->structure_only) {
2532             if (is == ADD_VALUES) *bap += value;
2533             else *bap = value;
2534           }
2535           goto noinsert1;
2536         }
2537       }
2538       if (nonew == 1) goto noinsert1;
2539       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2540       if (A->structure_only) {
2541         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2542       } else {
2543         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2544       }
2545       N = nrow++ - 1;
2546       high++;
2547       /* shift up all the later entries in this row */
2548       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2549       rp[i] = bcol;
2550       if (!A->structure_only) {
2551         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2552         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2553         ap[bs2 * i + bs * cidx + ridx] = value;
2554       }
2555       a->nz++;
2556       A->nonzerostate++;
2557     noinsert1:;
2558       low = i;
2559     }
2560     ailen[brow] = nrow;
2561   }
2562   PetscFunctionReturn(PETSC_SUCCESS);
2563 }
2564 
2565 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2566 {
2567   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
2568   Mat          outA;
2569   PetscBool    row_identity, col_identity;
2570 
2571   PetscFunctionBegin;
2572   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
2573   PetscCall(ISIdentity(row, &row_identity));
2574   PetscCall(ISIdentity(col, &col_identity));
2575   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
2576 
2577   outA            = inA;
2578   inA->factortype = MAT_FACTOR_LU;
2579   PetscCall(PetscFree(inA->solvertype));
2580   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
2581 
2582   PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2583 
2584   PetscCall(PetscObjectReference((PetscObject)row));
2585   PetscCall(ISDestroy(&a->row));
2586   a->row = row;
2587   PetscCall(PetscObjectReference((PetscObject)col));
2588   PetscCall(ISDestroy(&a->col));
2589   a->col = col;
2590 
2591   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2592   PetscCall(ISDestroy(&a->icol));
2593   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2594 
2595   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2596   if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
2597   PetscCall(MatLUFactorNumeric(outA, inA, info));
2598   PetscFunctionReturn(PETSC_SUCCESS);
2599 }
2600 
2601 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2602 {
2603   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2604 
2605   PetscFunctionBegin;
2606   baij->nz = baij->maxnz;
2607   PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2608   PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
2609   PetscFunctionReturn(PETSC_SUCCESS);
2610 }
2611 
2612 /*@
2613   MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix.
2614 
2615   Input Parameters:
2616 + mat     - the `MATSEQBAIJ` matrix
2617 - indices - the column indices
2618 
2619   Level: advanced
2620 
2621   Notes:
2622   This can be called if you have precomputed the nonzero structure of the
2623   matrix and want to provide it to the matrix object to improve the performance
2624   of the `MatSetValues()` operation.
2625 
2626   You MUST have set the correct numbers of nonzeros per row in the call to
2627   `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
2628 
2629   MUST be called before any calls to `MatSetValues()`
2630 
2631 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()`
2632 @*/
2633 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2634 {
2635   PetscFunctionBegin;
2636   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2637   PetscAssertPointer(indices, 2);
2638   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices));
2639   PetscFunctionReturn(PETSC_SUCCESS);
2640 }
2641 
2642 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2643 {
2644   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2645   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2646   PetscReal    atmp;
2647   PetscScalar *x, zero = 0.0;
2648   MatScalar   *aa;
2649   PetscInt     ncols, brow, krow, kcol;
2650 
2651   PetscFunctionBegin;
2652   /* why is this not a macro???????????????????????????????????????????????????????????????? */
2653   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2654   bs  = A->rmap->bs;
2655   aa  = a->a;
2656   ai  = a->i;
2657   aj  = a->j;
2658   mbs = a->mbs;
2659 
2660   PetscCall(VecSet(v, zero));
2661   PetscCall(VecGetArray(v, &x));
2662   PetscCall(VecGetLocalSize(v, &n));
2663   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2664   for (i = 0; i < mbs; i++) {
2665     ncols = ai[1] - ai[0];
2666     ai++;
2667     brow = bs * i;
2668     for (j = 0; j < ncols; j++) {
2669       for (kcol = 0; kcol < bs; kcol++) {
2670         for (krow = 0; krow < bs; krow++) {
2671           atmp = PetscAbsScalar(*aa);
2672           aa++;
2673           row = brow + krow; /* row index */
2674           if (PetscAbsScalar(x[row]) < atmp) {
2675             x[row] = atmp;
2676             if (idx) idx[row] = bs * (*aj) + kcol;
2677           }
2678         }
2679       }
2680       aj++;
2681     }
2682   }
2683   PetscCall(VecRestoreArray(v, &x));
2684   PetscFunctionReturn(PETSC_SUCCESS);
2685 }
2686 
2687 PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2688 {
2689   PetscFunctionBegin;
2690   /* If the two matrices have the same copy implementation, use fast copy. */
2691   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2692     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
2693     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2694     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
2695 
2696     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
2697     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
2698     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
2699     PetscCall(PetscObjectStateIncrease((PetscObject)B));
2700   } else {
2701     PetscCall(MatCopy_Basic(A, B, str));
2702   }
2703   PetscFunctionReturn(PETSC_SUCCESS);
2704 }
2705 
2706 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2707 {
2708   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2709 
2710   PetscFunctionBegin;
2711   *array = a->a;
2712   PetscFunctionReturn(PETSC_SUCCESS);
2713 }
2714 
2715 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2716 {
2717   PetscFunctionBegin;
2718   *array = NULL;
2719   PetscFunctionReturn(PETSC_SUCCESS);
2720 }
2721 
2722 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2723 {
2724   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
2725   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
2726   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
2727 
2728   PetscFunctionBegin;
2729   /* Set the number of nonzeros in the new matrix */
2730   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
2731   PetscFunctionReturn(PETSC_SUCCESS);
2732 }
2733 
2734 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2735 {
2736   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
2737   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2738   PetscBLASInt one = 1;
2739 
2740   PetscFunctionBegin;
2741   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2742     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2743     if (e) {
2744       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2745       if (e) {
2746         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2747         if (e) str = SAME_NONZERO_PATTERN;
2748       }
2749     }
2750     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2751   }
2752   if (str == SAME_NONZERO_PATTERN) {
2753     PetscScalar  alpha = a;
2754     PetscBLASInt bnz;
2755     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2756     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
2757     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2758   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2759     PetscCall(MatAXPY_Basic(Y, a, X, str));
2760   } else {
2761     Mat       B;
2762     PetscInt *nnz;
2763     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
2764     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
2765     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2766     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2767     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
2768     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
2769     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
2770     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
2771     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
2772     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2773     PetscCall(MatHeaderMerge(Y, &B));
2774     PetscCall(PetscFree(nnz));
2775   }
2776   PetscFunctionReturn(PETSC_SUCCESS);
2777 }
2778 
2779 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2780 {
2781 #if PetscDefined(USE_COMPLEX)
2782   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2783   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2784   MatScalar   *aa = a->a;
2785 
2786   PetscFunctionBegin;
2787   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
2788   PetscFunctionReturn(PETSC_SUCCESS);
2789 #else
2790   (void)A;
2791   return PETSC_SUCCESS;
2792 #endif
2793 }
2794 
2795 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2796 {
2797 #if PetscDefined(USE_COMPLEX)
2798   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2799   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2800   MatScalar   *aa = a->a;
2801 
2802   PetscFunctionBegin;
2803   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
2804   PetscFunctionReturn(PETSC_SUCCESS);
2805 #else
2806   (void)A;
2807   return PETSC_SUCCESS;
2808 #endif
2809 }
2810 
2811 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2812 {
2813 #if PetscDefined(USE_COMPLEX)
2814   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2815   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2816   MatScalar   *aa = a->a;
2817 
2818   PetscFunctionBegin;
2819   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2820   PetscFunctionReturn(PETSC_SUCCESS);
2821 #else
2822   (void)A;
2823   return PETSC_SUCCESS;
2824 #endif
2825 }
2826 
2827 /*
2828     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2829 */
2830 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2831 {
2832   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
2833   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
2834   PetscInt     nz = a->i[m], row, *jj, mr, col;
2835 
2836   PetscFunctionBegin;
2837   *nn = n;
2838   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2839   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
2840   PetscCall(PetscCalloc1(n, &collengths));
2841   PetscCall(PetscMalloc1(n + 1, &cia));
2842   PetscCall(PetscMalloc1(nz, &cja));
2843   jj = a->j;
2844   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2845   cia[0] = oshift;
2846   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2847   PetscCall(PetscArrayzero(collengths, n));
2848   jj = a->j;
2849   for (row = 0; row < m; row++) {
2850     mr = a->i[row + 1] - a->i[row];
2851     for (i = 0; i < mr; i++) {
2852       col = *jj++;
2853 
2854       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2855     }
2856   }
2857   PetscCall(PetscFree(collengths));
2858   *ia = cia;
2859   *ja = cja;
2860   PetscFunctionReturn(PETSC_SUCCESS);
2861 }
2862 
2863 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2864 {
2865   PetscFunctionBegin;
2866   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2867   PetscCall(PetscFree(*ia));
2868   PetscCall(PetscFree(*ja));
2869   PetscFunctionReturn(PETSC_SUCCESS);
2870 }
2871 
2872 /*
2873  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2874  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2875  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2876  */
2877 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2878 {
2879   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2880   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2881   PetscInt     nz = a->i[m], row, *jj, mr, col;
2882   PetscInt    *cspidx;
2883 
2884   PetscFunctionBegin;
2885   *nn = n;
2886   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2887 
2888   PetscCall(PetscCalloc1(n, &collengths));
2889   PetscCall(PetscMalloc1(n + 1, &cia));
2890   PetscCall(PetscMalloc1(nz, &cja));
2891   PetscCall(PetscMalloc1(nz, &cspidx));
2892   jj = a->j;
2893   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2894   cia[0] = oshift;
2895   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2896   PetscCall(PetscArrayzero(collengths, n));
2897   jj = a->j;
2898   for (row = 0; row < m; row++) {
2899     mr = a->i[row + 1] - a->i[row];
2900     for (i = 0; i < mr; i++) {
2901       col                                         = *jj++;
2902       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2903       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2904     }
2905   }
2906   PetscCall(PetscFree(collengths));
2907   *ia    = cia;
2908   *ja    = cja;
2909   *spidx = cspidx;
2910   PetscFunctionReturn(PETSC_SUCCESS);
2911 }
2912 
2913 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2914 {
2915   PetscFunctionBegin;
2916   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
2917   PetscCall(PetscFree(*spidx));
2918   PetscFunctionReturn(PETSC_SUCCESS);
2919 }
2920 
2921 PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2922 {
2923   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
2924 
2925   PetscFunctionBegin;
2926   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
2927   PetscCall(MatShift_Basic(Y, a));
2928   PetscFunctionReturn(PETSC_SUCCESS);
2929 }
2930 
2931 PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep)
2932 {
2933   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2934   PetscInt     fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k;
2935   PetscInt     m = A->rmap->N, *ailen = a->ilen;
2936   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2937   MatScalar   *aa = a->a, *ap;
2938   PetscBool    zero;
2939 
2940   PetscFunctionBegin;
2941   PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix");
2942   if (m) rmax = ailen[0];
2943   for (i = 1; i <= mbs; i++) {
2944     for (k = ai[i - 1]; k < ai[i]; k++) {
2945       zero = PETSC_TRUE;
2946       ap   = aa + bs2 * k;
2947       for (j = 0; j < bs2 && zero; j++) {
2948         if (ap[j] != 0.0) zero = PETSC_FALSE;
2949       }
2950       if (zero && (aj[k] != i - 1 || !keep)) fshift++;
2951       else {
2952         if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1));
2953         aj[k - fshift] = aj[k];
2954         PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2));
2955       }
2956     }
2957     ai[i - 1] -= fshift_prev;
2958     fshift_prev  = fshift;
2959     ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1];
2960     a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0);
2961     rmax = PetscMax(rmax, ailen[i - 1]);
2962   }
2963   if (fshift) {
2964     if (mbs) {
2965       ai[mbs] -= fshift;
2966       a->nz = ai[mbs];
2967     }
2968     PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz));
2969     A->nonzerostate++;
2970     A->info.nz_unneeded += (PetscReal)fshift;
2971     a->rmax = rmax;
2972     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2973     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
2974   }
2975   PetscFunctionReturn(PETSC_SUCCESS);
2976 }
2977 
2978 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2979                                        MatGetRow_SeqBAIJ,
2980                                        MatRestoreRow_SeqBAIJ,
2981                                        MatMult_SeqBAIJ_N,
2982                                        /* 4*/ MatMultAdd_SeqBAIJ_N,
2983                                        MatMultTranspose_SeqBAIJ,
2984                                        MatMultTransposeAdd_SeqBAIJ,
2985                                        NULL,
2986                                        NULL,
2987                                        NULL,
2988                                        /* 10*/ NULL,
2989                                        MatLUFactor_SeqBAIJ,
2990                                        NULL,
2991                                        NULL,
2992                                        MatTranspose_SeqBAIJ,
2993                                        /* 15*/ MatGetInfo_SeqBAIJ,
2994                                        MatEqual_SeqBAIJ,
2995                                        MatGetDiagonal_SeqBAIJ,
2996                                        MatDiagonalScale_SeqBAIJ,
2997                                        MatNorm_SeqBAIJ,
2998                                        /* 20*/ NULL,
2999                                        MatAssemblyEnd_SeqBAIJ,
3000                                        MatSetOption_SeqBAIJ,
3001                                        MatZeroEntries_SeqBAIJ,
3002                                        /* 24*/ MatZeroRows_SeqBAIJ,
3003                                        NULL,
3004                                        NULL,
3005                                        NULL,
3006                                        NULL,
3007                                        /* 29*/ MatSetUp_Seq_Hash,
3008                                        NULL,
3009                                        NULL,
3010                                        NULL,
3011                                        NULL,
3012                                        /* 34*/ MatDuplicate_SeqBAIJ,
3013                                        NULL,
3014                                        NULL,
3015                                        MatILUFactor_SeqBAIJ,
3016                                        NULL,
3017                                        /* 39*/ MatAXPY_SeqBAIJ,
3018                                        MatCreateSubMatrices_SeqBAIJ,
3019                                        MatIncreaseOverlap_SeqBAIJ,
3020                                        MatGetValues_SeqBAIJ,
3021                                        MatCopy_SeqBAIJ,
3022                                        /* 44*/ NULL,
3023                                        MatScale_SeqBAIJ,
3024                                        MatShift_SeqBAIJ,
3025                                        NULL,
3026                                        MatZeroRowsColumns_SeqBAIJ,
3027                                        /* 49*/ NULL,
3028                                        MatGetRowIJ_SeqBAIJ,
3029                                        MatRestoreRowIJ_SeqBAIJ,
3030                                        MatGetColumnIJ_SeqBAIJ,
3031                                        MatRestoreColumnIJ_SeqBAIJ,
3032                                        /* 54*/ MatFDColoringCreate_SeqXAIJ,
3033                                        NULL,
3034                                        NULL,
3035                                        NULL,
3036                                        MatSetValuesBlocked_SeqBAIJ,
3037                                        /* 59*/ MatCreateSubMatrix_SeqBAIJ,
3038                                        MatDestroy_SeqBAIJ,
3039                                        MatView_SeqBAIJ,
3040                                        NULL,
3041                                        NULL,
3042                                        /* 64*/ NULL,
3043                                        NULL,
3044                                        NULL,
3045                                        NULL,
3046                                        NULL,
3047                                        /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
3048                                        NULL,
3049                                        MatConvert_Basic,
3050                                        NULL,
3051                                        NULL,
3052                                        /* 74*/ NULL,
3053                                        MatFDColoringApply_BAIJ,
3054                                        NULL,
3055                                        NULL,
3056                                        NULL,
3057                                        /* 79*/ NULL,
3058                                        NULL,
3059                                        NULL,
3060                                        NULL,
3061                                        MatLoad_SeqBAIJ,
3062                                        /* 84*/ NULL,
3063                                        NULL,
3064                                        NULL,
3065                                        NULL,
3066                                        NULL,
3067                                        /* 89*/ NULL,
3068                                        NULL,
3069                                        NULL,
3070                                        NULL,
3071                                        NULL,
3072                                        /* 94*/ NULL,
3073                                        NULL,
3074                                        NULL,
3075                                        NULL,
3076                                        NULL,
3077                                        /* 99*/ NULL,
3078                                        NULL,
3079                                        NULL,
3080                                        MatConjugate_SeqBAIJ,
3081                                        NULL,
3082                                        /*104*/ NULL,
3083                                        MatRealPart_SeqBAIJ,
3084                                        MatImaginaryPart_SeqBAIJ,
3085                                        NULL,
3086                                        NULL,
3087                                        /*109*/ NULL,
3088                                        NULL,
3089                                        NULL,
3090                                        NULL,
3091                                        MatMissingDiagonal_SeqBAIJ,
3092                                        /*114*/ NULL,
3093                                        NULL,
3094                                        NULL,
3095                                        NULL,
3096                                        NULL,
3097                                        /*119*/ NULL,
3098                                        NULL,
3099                                        MatMultHermitianTranspose_SeqBAIJ,
3100                                        MatMultHermitianTransposeAdd_SeqBAIJ,
3101                                        NULL,
3102                                        /*124*/ NULL,
3103                                        MatGetColumnReductions_SeqBAIJ,
3104                                        MatInvertBlockDiagonal_SeqBAIJ,
3105                                        NULL,
3106                                        NULL,
3107                                        /*129*/ NULL,
3108                                        NULL,
3109                                        NULL,
3110                                        NULL,
3111                                        NULL,
3112                                        /*134*/ NULL,
3113                                        NULL,
3114                                        NULL,
3115                                        NULL,
3116                                        NULL,
3117                                        /*139*/ MatSetBlockSizes_Default,
3118                                        NULL,
3119                                        NULL,
3120                                        MatFDColoringSetUp_SeqXAIJ,
3121                                        NULL,
3122                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3123                                        MatDestroySubMatrices_SeqBAIJ,
3124                                        NULL,
3125                                        NULL,
3126                                        NULL,
3127                                        NULL,
3128                                        /*150*/ NULL,
3129                                        MatEliminateZeros_SeqBAIJ};
3130 
3131 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3132 {
3133   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3134   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3135 
3136   PetscFunctionBegin;
3137   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3138 
3139   /* allocate space for values if not already there */
3140   if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
3141 
3142   /* copy values over */
3143   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
3144   PetscFunctionReturn(PETSC_SUCCESS);
3145 }
3146 
3147 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3148 {
3149   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3150   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3151 
3152   PetscFunctionBegin;
3153   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3154   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
3155 
3156   /* copy values over */
3157   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
3158   PetscFunctionReturn(PETSC_SUCCESS);
3159 }
3160 
3161 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3162 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3163 
3164 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, PetscInt *nnz)
3165 {
3166   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
3167   PetscInt     i, mbs, nbs, bs2;
3168   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3169 
3170   PetscFunctionBegin;
3171   if (B->hash_active) {
3172     PetscInt bs;
3173     B->ops[0] = b->cops;
3174     PetscCall(PetscHMapIJVDestroy(&b->ht));
3175     PetscCall(MatGetBlockSize(B, &bs));
3176     if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht));
3177     PetscCall(PetscFree(b->dnz));
3178     PetscCall(PetscFree(b->bdnz));
3179     B->hash_active = PETSC_FALSE;
3180   }
3181   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3182   if (nz == MAT_SKIP_ALLOCATION) {
3183     skipallocation = PETSC_TRUE;
3184     nz             = 0;
3185   }
3186 
3187   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
3188   PetscCall(PetscLayoutSetUp(B->rmap));
3189   PetscCall(PetscLayoutSetUp(B->cmap));
3190   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3191 
3192   B->preallocated = PETSC_TRUE;
3193 
3194   mbs = B->rmap->n / bs;
3195   nbs = B->cmap->n / bs;
3196   bs2 = bs * bs;
3197 
3198   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3199 
3200   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3201   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3202   if (nnz) {
3203     for (i = 0; i < mbs; i++) {
3204       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
3205       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3206     }
3207   }
3208 
3209   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
3210   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3211   PetscOptionsEnd();
3212 
3213   if (!flg) {
3214     switch (bs) {
3215     case 1:
3216       B->ops->mult    = MatMult_SeqBAIJ_1;
3217       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3218       break;
3219     case 2:
3220       B->ops->mult    = MatMult_SeqBAIJ_2;
3221       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3222       break;
3223     case 3:
3224       B->ops->mult    = MatMult_SeqBAIJ_3;
3225       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3226       break;
3227     case 4:
3228       B->ops->mult    = MatMult_SeqBAIJ_4;
3229       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3230       break;
3231     case 5:
3232       B->ops->mult    = MatMult_SeqBAIJ_5;
3233       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3234       break;
3235     case 6:
3236       B->ops->mult    = MatMult_SeqBAIJ_6;
3237       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3238       break;
3239     case 7:
3240       B->ops->mult    = MatMult_SeqBAIJ_7;
3241       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3242       break;
3243     case 9: {
3244       PetscInt version = 1;
3245       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3246       switch (version) {
3247 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3248       case 1:
3249         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
3250         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
3251         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3252         break;
3253 #endif
3254       default:
3255         B->ops->mult    = MatMult_SeqBAIJ_N;
3256         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3257         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3258         break;
3259       }
3260       break;
3261     }
3262     case 11:
3263       B->ops->mult    = MatMult_SeqBAIJ_11;
3264       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3265       break;
3266     case 12: {
3267       PetscInt version = 1;
3268       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3269       switch (version) {
3270       case 1:
3271         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
3272         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3273         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3274         break;
3275       case 2:
3276         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
3277         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
3278         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3279         break;
3280 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3281       case 3:
3282         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
3283         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3284         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3285         break;
3286 #endif
3287       default:
3288         B->ops->mult    = MatMult_SeqBAIJ_N;
3289         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3290         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3291         break;
3292       }
3293       break;
3294     }
3295     case 15: {
3296       PetscInt version = 1;
3297       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3298       switch (version) {
3299       case 1:
3300         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3301         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3302         break;
3303       case 2:
3304         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
3305         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3306         break;
3307       case 3:
3308         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3309         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3310         break;
3311       case 4:
3312         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3313         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3314         break;
3315       default:
3316         B->ops->mult = MatMult_SeqBAIJ_N;
3317         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3318         break;
3319       }
3320       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3321       break;
3322     }
3323     default:
3324       B->ops->mult    = MatMult_SeqBAIJ_N;
3325       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3326       PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3327       break;
3328     }
3329   }
3330   B->ops->sor = MatSOR_SeqBAIJ;
3331   b->mbs      = mbs;
3332   b->nbs      = nbs;
3333   if (!skipallocation) {
3334     if (!b->imax) {
3335       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
3336 
3337       b->free_imax_ilen = PETSC_TRUE;
3338     }
3339     /* b->ilen will count nonzeros in each block row so far. */
3340     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3341     if (!nnz) {
3342       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3343       else if (nz < 0) nz = 1;
3344       nz = PetscMin(nz, nbs);
3345       for (i = 0; i < mbs; i++) b->imax[i] = nz;
3346       PetscCall(PetscIntMultError(nz, mbs, &nz));
3347     } else {
3348       PetscInt64 nz64 = 0;
3349       for (i = 0; i < mbs; i++) {
3350         b->imax[i] = nnz[i];
3351         nz64 += nnz[i];
3352       }
3353       PetscCall(PetscIntCast(nz64, &nz));
3354     }
3355 
3356     /* allocate the matrix space */
3357     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3358     if (B->structure_only) {
3359       PetscCall(PetscMalloc1(nz, &b->j));
3360       PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i));
3361     } else {
3362       PetscInt nzbs2 = 0;
3363       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
3364       PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i));
3365       PetscCall(PetscArrayzero(b->a, nz * bs2));
3366     }
3367     PetscCall(PetscArrayzero(b->j, nz));
3368 
3369     if (B->structure_only) {
3370       b->singlemalloc = PETSC_FALSE;
3371       b->free_a       = PETSC_FALSE;
3372     } else {
3373       b->singlemalloc = PETSC_TRUE;
3374       b->free_a       = PETSC_TRUE;
3375     }
3376     b->free_ij = PETSC_TRUE;
3377 
3378     b->i[0] = 0;
3379     for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3380 
3381   } else {
3382     b->free_a  = PETSC_FALSE;
3383     b->free_ij = PETSC_FALSE;
3384   }
3385 
3386   b->bs2              = bs2;
3387   b->mbs              = mbs;
3388   b->nz               = 0;
3389   b->maxnz            = nz;
3390   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3391   B->was_assembled    = PETSC_FALSE;
3392   B->assembled        = PETSC_FALSE;
3393   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3394   PetscFunctionReturn(PETSC_SUCCESS);
3395 }
3396 
3397 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3398 {
3399   PetscInt     i, m, nz, nz_max = 0, *nnz;
3400   PetscScalar *values      = NULL;
3401   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3402 
3403   PetscFunctionBegin;
3404   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
3405   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
3406   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
3407   PetscCall(PetscLayoutSetUp(B->rmap));
3408   PetscCall(PetscLayoutSetUp(B->cmap));
3409   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3410   m = B->rmap->n / bs;
3411 
3412   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
3413   PetscCall(PetscMalloc1(m + 1, &nnz));
3414   for (i = 0; i < m; i++) {
3415     nz = ii[i + 1] - ii[i];
3416     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3417     nz_max = PetscMax(nz_max, nz);
3418     nnz[i] = nz;
3419   }
3420   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
3421   PetscCall(PetscFree(nnz));
3422 
3423   values = (PetscScalar *)V;
3424   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3425   for (i = 0; i < m; i++) {
3426     PetscInt        ncols = ii[i + 1] - ii[i];
3427     const PetscInt *icols = jj + ii[i];
3428     if (bs == 1 || !roworiented) {
3429       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
3430       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
3431     } else {
3432       PetscInt j;
3433       for (j = 0; j < ncols; j++) {
3434         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
3435         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
3436       }
3437     }
3438   }
3439   if (!V) PetscCall(PetscFree(values));
3440   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3441   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3442   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3443   PetscFunctionReturn(PETSC_SUCCESS);
3444 }
3445 
3446 /*@C
3447   MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3448 
3449   Not Collective
3450 
3451   Input Parameter:
3452 . A - a `MATSEQBAIJ` matrix
3453 
3454   Output Parameter:
3455 . array - pointer to the data
3456 
3457   Level: intermediate
3458 
3459 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3460 @*/
3461 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array)
3462 {
3463   PetscFunctionBegin;
3464   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
3465   PetscFunctionReturn(PETSC_SUCCESS);
3466 }
3467 
3468 /*@C
3469   MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3470 
3471   Not Collective
3472 
3473   Input Parameters:
3474 + A     - a `MATSEQBAIJ` matrix
3475 - array - pointer to the data
3476 
3477   Level: intermediate
3478 
3479 .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3480 @*/
3481 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array)
3482 {
3483   PetscFunctionBegin;
3484   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
3485   PetscFunctionReturn(PETSC_SUCCESS);
3486 }
3487 
3488 /*MC
3489    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3490    block sparse compressed row format.
3491 
3492    Options Database Keys:
3493 + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()`
3494 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3495 
3496    Level: beginner
3497 
3498    Notes:
3499     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
3500     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
3501 
3502    Run with `-info` to see what version of the matrix-vector product is being used
3503 
3504 .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()`
3505 M*/
3506 
3507 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3508 
3509 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3510 {
3511   PetscMPIInt  size;
3512   Mat_SeqBAIJ *b;
3513 
3514   PetscFunctionBegin;
3515   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
3516   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3517 
3518   PetscCall(PetscNew(&b));
3519   B->data   = (void *)b;
3520   B->ops[0] = MatOps_Values;
3521 
3522   b->row          = NULL;
3523   b->col          = NULL;
3524   b->icol         = NULL;
3525   b->reallocs     = 0;
3526   b->saved_values = NULL;
3527 
3528   b->roworiented        = PETSC_TRUE;
3529   b->nonew              = 0;
3530   b->diag               = NULL;
3531   B->spptr              = NULL;
3532   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3533   b->keepnonzeropattern = PETSC_FALSE;
3534 
3535   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
3536   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
3537   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
3538   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
3539   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
3540   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
3541   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
3542   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
3543   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
3544   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
3545 #if defined(PETSC_HAVE_HYPRE)
3546   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
3547 #endif
3548   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
3549   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
3550   PetscFunctionReturn(PETSC_SUCCESS);
3551 }
3552 
3553 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3554 {
3555   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3556   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3557 
3558   PetscFunctionBegin;
3559   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix");
3560   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
3561 
3562   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3563     c->imax           = a->imax;
3564     c->ilen           = a->ilen;
3565     c->free_imax_ilen = PETSC_FALSE;
3566   } else {
3567     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3568     for (i = 0; i < mbs; i++) {
3569       c->imax[i] = a->imax[i];
3570       c->ilen[i] = a->ilen[i];
3571     }
3572     c->free_imax_ilen = PETSC_TRUE;
3573   }
3574 
3575   /* allocate the matrix space */
3576   if (mallocmatspace) {
3577     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3578       PetscCall(PetscCalloc1(bs2 * nz, &c->a));
3579 
3580       c->i            = a->i;
3581       c->j            = a->j;
3582       c->singlemalloc = PETSC_FALSE;
3583       c->free_a       = PETSC_TRUE;
3584       c->free_ij      = PETSC_FALSE;
3585       c->parent       = A;
3586       C->preallocated = PETSC_TRUE;
3587       C->assembled    = PETSC_TRUE;
3588 
3589       PetscCall(PetscObjectReference((PetscObject)A));
3590       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3591       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3592     } else {
3593       PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i));
3594 
3595       c->singlemalloc = PETSC_TRUE;
3596       c->free_a       = PETSC_TRUE;
3597       c->free_ij      = PETSC_TRUE;
3598 
3599       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3600       if (mbs > 0) {
3601         PetscCall(PetscArraycpy(c->j, a->j, nz));
3602         if (cpvalues == MAT_COPY_VALUES) {
3603           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
3604         } else {
3605           PetscCall(PetscArrayzero(c->a, bs2 * nz));
3606         }
3607       }
3608       C->preallocated = PETSC_TRUE;
3609       C->assembled    = PETSC_TRUE;
3610     }
3611   }
3612 
3613   c->roworiented = a->roworiented;
3614   c->nonew       = a->nonew;
3615 
3616   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
3617   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
3618 
3619   c->bs2 = a->bs2;
3620   c->mbs = a->mbs;
3621   c->nbs = a->nbs;
3622 
3623   if (a->diag) {
3624     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3625       c->diag      = a->diag;
3626       c->free_diag = PETSC_FALSE;
3627     } else {
3628       PetscCall(PetscMalloc1(mbs + 1, &c->diag));
3629       for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i];
3630       c->free_diag = PETSC_TRUE;
3631     }
3632   } else c->diag = NULL;
3633 
3634   c->nz         = a->nz;
3635   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3636   c->solve_work = NULL;
3637   c->mult_work  = NULL;
3638   c->sor_workt  = NULL;
3639   c->sor_work   = NULL;
3640 
3641   c->compressedrow.use   = a->compressedrow.use;
3642   c->compressedrow.nrows = a->compressedrow.nrows;
3643   if (a->compressedrow.use) {
3644     i = a->compressedrow.nrows;
3645     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
3646     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
3647     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
3648   } else {
3649     c->compressedrow.use    = PETSC_FALSE;
3650     c->compressedrow.i      = NULL;
3651     c->compressedrow.rindex = NULL;
3652   }
3653   c->nonzerorowcnt = a->nonzerorowcnt;
3654   C->nonzerostate  = A->nonzerostate;
3655 
3656   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
3657   PetscFunctionReturn(PETSC_SUCCESS);
3658 }
3659 
3660 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3661 {
3662   PetscFunctionBegin;
3663   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
3664   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
3665   PetscCall(MatSetType(*B, MATSEQBAIJ));
3666   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
3667   PetscFunctionReturn(PETSC_SUCCESS);
3668 }
3669 
3670 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3671 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3672 {
3673   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3674   PetscInt    *rowidxs, *colidxs;
3675   PetscScalar *matvals;
3676 
3677   PetscFunctionBegin;
3678   PetscCall(PetscViewerSetUp(viewer));
3679 
3680   /* read matrix header */
3681   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3682   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3683   M  = header[1];
3684   N  = header[2];
3685   nz = header[3];
3686   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3687   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3688   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3689 
3690   /* set block sizes from the viewer's .info file */
3691   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3692   /* set local and global sizes if not set already */
3693   if (mat->rmap->n < 0) mat->rmap->n = M;
3694   if (mat->cmap->n < 0) mat->cmap->n = N;
3695   if (mat->rmap->N < 0) mat->rmap->N = M;
3696   if (mat->cmap->N < 0) mat->cmap->N = N;
3697   PetscCall(PetscLayoutSetUp(mat->rmap));
3698   PetscCall(PetscLayoutSetUp(mat->cmap));
3699 
3700   /* check if the matrix sizes are correct */
3701   PetscCall(MatGetSize(mat, &rows, &cols));
3702   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3703   PetscCall(MatGetBlockSize(mat, &bs));
3704   PetscCall(MatGetLocalSize(mat, &m, &n));
3705   mbs = m / bs;
3706   nbs = n / bs;
3707 
3708   /* read in row lengths, column indices and nonzero values */
3709   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3710   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
3711   rowidxs[0] = 0;
3712   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3713   sum = rowidxs[m];
3714   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3715 
3716   /* read in column indices and nonzero values */
3717   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
3718   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
3719   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3720 
3721   {               /* preallocate matrix storage */
3722     PetscBT   bt; /* helper bit set to count nonzeros */
3723     PetscInt *nnz;
3724     PetscBool sbaij;
3725 
3726     PetscCall(PetscBTCreate(nbs, &bt));
3727     PetscCall(PetscCalloc1(mbs, &nnz));
3728     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3729     for (i = 0; i < mbs; i++) {
3730       PetscCall(PetscBTMemzero(nbs, bt));
3731       for (k = 0; k < bs; k++) {
3732         PetscInt row = bs * i + k;
3733         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3734           PetscInt col = colidxs[j];
3735           if (!sbaij || col >= row)
3736             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3737         }
3738       }
3739     }
3740     PetscCall(PetscBTDestroy(&bt));
3741     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
3742     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
3743     PetscCall(PetscFree(nnz));
3744   }
3745 
3746   /* store matrix values */
3747   for (i = 0; i < m; i++) {
3748     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
3749     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3750   }
3751 
3752   PetscCall(PetscFree(rowidxs));
3753   PetscCall(PetscFree2(colidxs, matvals));
3754   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3755   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3756   PetscFunctionReturn(PETSC_SUCCESS);
3757 }
3758 
3759 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3760 {
3761   PetscBool isbinary;
3762 
3763   PetscFunctionBegin;
3764   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3765   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3766   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
3767   PetscFunctionReturn(PETSC_SUCCESS);
3768 }
3769 
3770 /*@C
3771   MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3772   compressed row) format.  For good matrix assembly performance the
3773   user should preallocate the matrix storage by setting the parameter `nz`
3774   (or the array `nnz`).
3775 
3776   Collective
3777 
3778   Input Parameters:
3779 + comm - MPI communicator, set to `PETSC_COMM_SELF`
3780 . bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3781           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3782 . m    - number of rows
3783 . n    - number of columns
3784 . nz   - number of nonzero blocks  per block row (same for all rows)
3785 - nnz  - array containing the number of nonzero blocks in the various block rows
3786          (possibly different for each block row) or `NULL`
3787 
3788   Output Parameter:
3789 . A - the matrix
3790 
3791   Options Database Keys:
3792 + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3793 - -mat_block_size - size of the blocks to use
3794 
3795   Level: intermediate
3796 
3797   Notes:
3798   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3799   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3800   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
3801 
3802   The number of rows and columns must be divisible by blocksize.
3803 
3804   If the `nnz` parameter is given then the `nz` parameter is ignored
3805 
3806   A nonzero block is any block that as 1 or more nonzeros in it
3807 
3808   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3809   storage.  That is, the stored row and column indices can begin at
3810   either one (as in Fortran) or zero.
3811 
3812   Specify the preallocated storage with either `nz` or `nnz` (not both).
3813   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3814   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3815   matrices.
3816 
3817 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3818 @*/
3819 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3820 {
3821   PetscFunctionBegin;
3822   PetscCall(MatCreate(comm, A));
3823   PetscCall(MatSetSizes(*A, m, n, m, n));
3824   PetscCall(MatSetType(*A, MATSEQBAIJ));
3825   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
3826   PetscFunctionReturn(PETSC_SUCCESS);
3827 }
3828 
3829 /*@C
3830   MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3831   per row in the matrix. For good matrix assembly performance the
3832   user should preallocate the matrix storage by setting the parameter `nz`
3833   (or the array `nnz`).
3834 
3835   Collective
3836 
3837   Input Parameters:
3838 + B   - the matrix
3839 . bs  - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3840           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3841 . nz  - number of block nonzeros per block row (same for all rows)
3842 - nnz - array containing the number of block nonzeros in the various block rows
3843          (possibly different for each block row) or `NULL`
3844 
3845   Options Database Keys:
3846 + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3847 - -mat_block_size - size of the blocks to use
3848 
3849   Level: intermediate
3850 
3851   Notes:
3852   If the `nnz` parameter is given then the `nz` parameter is ignored
3853 
3854   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3855   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3856   You can also run with the option `-info` and look for messages with the string
3857   malloc in them to see if additional memory allocation was needed.
3858 
3859   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3860   storage.  That is, the stored row and column indices can begin at
3861   either one (as in Fortran) or zero.
3862 
3863   Specify the preallocated storage with either nz or nnz (not both).
3864   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3865   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3866 
3867 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3868 @*/
3869 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3870 {
3871   PetscFunctionBegin;
3872   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3873   PetscValidType(B, 1);
3874   PetscValidLogicalCollectiveInt(B, bs, 2);
3875   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
3876   PetscFunctionReturn(PETSC_SUCCESS);
3877 }
3878 
3879 /*@C
3880   MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3881 
3882   Collective
3883 
3884   Input Parameters:
3885 + B  - the matrix
3886 . bs - the blocksize
3887 . i  - the indices into `j` for the start of each local row (starts with zero)
3888 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
3889 - v  - optional values in the matrix
3890 
3891   Level: advanced
3892 
3893   Notes:
3894   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
3895   may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
3896   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3897   `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3898   block column and the second index is over columns within a block.
3899 
3900   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3901 
3902 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3903 @*/
3904 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3905 {
3906   PetscFunctionBegin;
3907   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3908   PetscValidType(B, 1);
3909   PetscValidLogicalCollectiveInt(B, bs, 2);
3910   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
3911   PetscFunctionReturn(PETSC_SUCCESS);
3912 }
3913 
3914 /*@
3915   MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3916 
3917   Collective
3918 
3919   Input Parameters:
3920 + comm - must be an MPI communicator of size 1
3921 . bs   - size of block
3922 . m    - number of rows
3923 . n    - number of columns
3924 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3925 . j    - column indices
3926 - a    - matrix values
3927 
3928   Output Parameter:
3929 . mat - the matrix
3930 
3931   Level: advanced
3932 
3933   Notes:
3934   The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays
3935   once the matrix is destroyed
3936 
3937   You cannot set new nonzero locations into this matrix, that will generate an error.
3938 
3939   The `i` and `j` indices are 0 based
3940 
3941   When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3942 
3943   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3944   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3945   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3946   with column-major ordering within blocks.
3947 
3948 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3949 @*/
3950 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3951 {
3952   Mat_SeqBAIJ *baij;
3953 
3954   PetscFunctionBegin;
3955   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
3956   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3957 
3958   PetscCall(MatCreate(comm, mat));
3959   PetscCall(MatSetSizes(*mat, m, n, m, n));
3960   PetscCall(MatSetType(*mat, MATSEQBAIJ));
3961   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3962   baij = (Mat_SeqBAIJ *)(*mat)->data;
3963   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3964 
3965   baij->i = i;
3966   baij->j = j;
3967   baij->a = a;
3968 
3969   baij->singlemalloc   = PETSC_FALSE;
3970   baij->nonew          = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3971   baij->free_a         = PETSC_FALSE;
3972   baij->free_ij        = PETSC_FALSE;
3973   baij->free_imax_ilen = PETSC_TRUE;
3974 
3975   for (PetscInt ii = 0; ii < m; ii++) {
3976     const PetscInt row_len = i[ii + 1] - i[ii];
3977 
3978     baij->ilen[ii] = baij->imax[ii] = row_len;
3979     PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
3980   }
3981   if (PetscDefined(USE_DEBUG)) {
3982     for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
3983       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3984       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3985     }
3986   }
3987 
3988   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3989   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3990   PetscFunctionReturn(PETSC_SUCCESS);
3991 }
3992 
3993 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3994 {
3995   PetscFunctionBegin;
3996   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
3997   PetscFunctionReturn(PETSC_SUCCESS);
3998 }
3999