xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 2d30e087755efd99e28fdfe792ffbeb2ee1ea928)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h> /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
19 
20 PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) {
21   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
22   PetscInt     m, n, i;
23   PetscInt     ib, jb, bs = A->rmap->bs;
24   MatScalar   *a_val = a_aij->a;
25 
26   PetscFunctionBegin;
27   PetscCall(MatGetSize(A, &m, &n));
28   for (i = 0; i < n; i++) reductions[i] = 0.0;
29   if (type == NORM_2) {
30     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
31       for (jb = 0; jb < bs; jb++) {
32         for (ib = 0; ib < bs; ib++) {
33           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
34           a_val++;
35         }
36       }
37     }
38   } else if (type == NORM_1) {
39     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
40       for (jb = 0; jb < bs; jb++) {
41         for (ib = 0; ib < bs; ib++) {
42           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
43           a_val++;
44         }
45       }
46     }
47   } else if (type == NORM_INFINITY) {
48     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
49       for (jb = 0; jb < bs; jb++) {
50         for (ib = 0; ib < bs; ib++) {
51           int col         = A->cmap->rstart + a_aij->j[i] * bs + jb;
52           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
53           a_val++;
54         }
55       }
56     }
57   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
58     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
59       for (jb = 0; jb < bs; jb++) {
60         for (ib = 0; ib < bs; ib++) {
61           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
62           a_val++;
63         }
64       }
65     }
66   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
67     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
68       for (jb = 0; jb < bs; jb++) {
69         for (ib = 0; ib < bs; ib++) {
70           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
71           a_val++;
72         }
73       }
74     }
75   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
76   if (type == NORM_2) {
77     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
78   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
79     for (i = 0; i < n; i++) reductions[i] /= m;
80   }
81   PetscFunctionReturn(0);
82 }
83 
84 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) {
85   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
86   PetscInt    *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
87   MatScalar   *v     = a->a, *odiag, *diag, work[25], *v_work;
88   PetscReal    shift = 0.0;
89   PetscBool    allowzeropivot, zeropivotdetected = PETSC_FALSE;
90 
91   PetscFunctionBegin;
92   allowzeropivot = PetscNot(A->erroriffailure);
93 
94   if (a->idiagvalid) {
95     if (values) *values = a->idiag;
96     PetscFunctionReturn(0);
97   }
98   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
99   diag_offset = a->diag;
100   if (!a->idiag) {
101     PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag));
102     PetscCall(PetscLogObjectMemory((PetscObject)A, bs2 * mbs * sizeof(PetscScalar)));
103   }
104   diag = a->idiag;
105   if (values) *values = a->idiag;
106   /* factor and invert each block */
107   switch (bs) {
108   case 1:
109     for (i = 0; i < mbs; i++) {
110       odiag   = v + 1 * diag_offset[i];
111       diag[0] = odiag[0];
112 
113       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
114         if (allowzeropivot) {
115           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
116           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
117           A->factorerror_zeropivot_row   = i;
118           PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
119         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
120       }
121 
122       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
123       diag += 1;
124     }
125     break;
126   case 2:
127     for (i = 0; i < mbs; i++) {
128       odiag   = v + 4 * diag_offset[i];
129       diag[0] = odiag[0];
130       diag[1] = odiag[1];
131       diag[2] = odiag[2];
132       diag[3] = odiag[3];
133       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
134       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
135       diag += 4;
136     }
137     break;
138   case 3:
139     for (i = 0; i < mbs; i++) {
140       odiag   = v + 9 * diag_offset[i];
141       diag[0] = odiag[0];
142       diag[1] = odiag[1];
143       diag[2] = odiag[2];
144       diag[3] = odiag[3];
145       diag[4] = odiag[4];
146       diag[5] = odiag[5];
147       diag[6] = odiag[6];
148       diag[7] = odiag[7];
149       diag[8] = odiag[8];
150       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
151       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
152       diag += 9;
153     }
154     break;
155   case 4:
156     for (i = 0; i < mbs; i++) {
157       odiag = v + 16 * diag_offset[i];
158       PetscCall(PetscArraycpy(diag, odiag, 16));
159       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
160       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
161       diag += 16;
162     }
163     break;
164   case 5:
165     for (i = 0; i < mbs; i++) {
166       odiag = v + 25 * diag_offset[i];
167       PetscCall(PetscArraycpy(diag, odiag, 25));
168       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
169       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
170       diag += 25;
171     }
172     break;
173   case 6:
174     for (i = 0; i < mbs; i++) {
175       odiag = v + 36 * diag_offset[i];
176       PetscCall(PetscArraycpy(diag, odiag, 36));
177       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
178       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
179       diag += 36;
180     }
181     break;
182   case 7:
183     for (i = 0; i < mbs; i++) {
184       odiag = v + 49 * diag_offset[i];
185       PetscCall(PetscArraycpy(diag, odiag, 49));
186       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
187       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
188       diag += 49;
189     }
190     break;
191   default:
192     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
193     for (i = 0; i < mbs; i++) {
194       odiag = v + bs2 * diag_offset[i];
195       PetscCall(PetscArraycpy(diag, odiag, bs2));
196       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
197       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
198       diag += bs2;
199     }
200     PetscCall(PetscFree2(v_work, v_pivots));
201   }
202   a->idiagvalid = PETSC_TRUE;
203   PetscFunctionReturn(0);
204 }
205 
206 PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) {
207   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
208   PetscScalar       *x, *work, *w, *workt, *t;
209   const MatScalar   *v, *aa = a->a, *idiag;
210   const PetscScalar *b, *xb;
211   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
212   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
213   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;
214 
215   PetscFunctionBegin;
216   its = its * lits;
217   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
218   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
219   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
220   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
221   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
222 
223   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
224 
225   if (!m) PetscFunctionReturn(0);
226   diag  = a->diag;
227   idiag = a->idiag;
228   k     = PetscMax(A->rmap->n, A->cmap->n);
229   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
230   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
231   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
232   work = a->mult_work;
233   t    = a->sor_workt;
234   w    = a->sor_work;
235 
236   PetscCall(VecGetArray(xx, &x));
237   PetscCall(VecGetArrayRead(bb, &b));
238 
239   if (flag & SOR_ZERO_INITIAL_GUESS) {
240     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
241       switch (bs) {
242       case 1:
243         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
244         t[0] = b[0];
245         i2   = 1;
246         idiag += 1;
247         for (i = 1; i < m; i++) {
248           v    = aa + ai[i];
249           vi   = aj + ai[i];
250           nz   = diag[i] - ai[i];
251           s[0] = b[i2];
252           for (j = 0; j < nz; j++) {
253             xw[0] = x[vi[j]];
254             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
255           }
256           t[i2] = s[0];
257           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
258           x[i2] = xw[0];
259           idiag += 1;
260           i2 += 1;
261         }
262         break;
263       case 2:
264         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
265         t[0] = b[0];
266         t[1] = b[1];
267         i2   = 2;
268         idiag += 4;
269         for (i = 1; i < m; i++) {
270           v    = aa + 4 * ai[i];
271           vi   = aj + ai[i];
272           nz   = diag[i] - ai[i];
273           s[0] = b[i2];
274           s[1] = b[i2 + 1];
275           for (j = 0; j < nz; j++) {
276             idx   = 2 * vi[j];
277             it    = 4 * j;
278             xw[0] = x[idx];
279             xw[1] = x[1 + idx];
280             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
281           }
282           t[i2]     = s[0];
283           t[i2 + 1] = s[1];
284           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
285           x[i2]     = xw[0];
286           x[i2 + 1] = xw[1];
287           idiag += 4;
288           i2 += 2;
289         }
290         break;
291       case 3:
292         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
293         t[0] = b[0];
294         t[1] = b[1];
295         t[2] = b[2];
296         i2   = 3;
297         idiag += 9;
298         for (i = 1; i < m; i++) {
299           v    = aa + 9 * ai[i];
300           vi   = aj + ai[i];
301           nz   = diag[i] - ai[i];
302           s[0] = b[i2];
303           s[1] = b[i2 + 1];
304           s[2] = b[i2 + 2];
305           while (nz--) {
306             idx   = 3 * (*vi++);
307             xw[0] = x[idx];
308             xw[1] = x[1 + idx];
309             xw[2] = x[2 + idx];
310             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
311             v += 9;
312           }
313           t[i2]     = s[0];
314           t[i2 + 1] = s[1];
315           t[i2 + 2] = s[2];
316           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
317           x[i2]     = xw[0];
318           x[i2 + 1] = xw[1];
319           x[i2 + 2] = xw[2];
320           idiag += 9;
321           i2 += 3;
322         }
323         break;
324       case 4:
325         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
326         t[0] = b[0];
327         t[1] = b[1];
328         t[2] = b[2];
329         t[3] = b[3];
330         i2   = 4;
331         idiag += 16;
332         for (i = 1; i < m; i++) {
333           v    = aa + 16 * ai[i];
334           vi   = aj + ai[i];
335           nz   = diag[i] - ai[i];
336           s[0] = b[i2];
337           s[1] = b[i2 + 1];
338           s[2] = b[i2 + 2];
339           s[3] = b[i2 + 3];
340           while (nz--) {
341             idx   = 4 * (*vi++);
342             xw[0] = x[idx];
343             xw[1] = x[1 + idx];
344             xw[2] = x[2 + idx];
345             xw[3] = x[3 + idx];
346             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
347             v += 16;
348           }
349           t[i2]     = s[0];
350           t[i2 + 1] = s[1];
351           t[i2 + 2] = s[2];
352           t[i2 + 3] = s[3];
353           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
354           x[i2]     = xw[0];
355           x[i2 + 1] = xw[1];
356           x[i2 + 2] = xw[2];
357           x[i2 + 3] = xw[3];
358           idiag += 16;
359           i2 += 4;
360         }
361         break;
362       case 5:
363         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
364         t[0] = b[0];
365         t[1] = b[1];
366         t[2] = b[2];
367         t[3] = b[3];
368         t[4] = b[4];
369         i2   = 5;
370         idiag += 25;
371         for (i = 1; i < m; i++) {
372           v    = aa + 25 * ai[i];
373           vi   = aj + ai[i];
374           nz   = diag[i] - ai[i];
375           s[0] = b[i2];
376           s[1] = b[i2 + 1];
377           s[2] = b[i2 + 2];
378           s[3] = b[i2 + 3];
379           s[4] = b[i2 + 4];
380           while (nz--) {
381             idx   = 5 * (*vi++);
382             xw[0] = x[idx];
383             xw[1] = x[1 + idx];
384             xw[2] = x[2 + idx];
385             xw[3] = x[3 + idx];
386             xw[4] = x[4 + idx];
387             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
388             v += 25;
389           }
390           t[i2]     = s[0];
391           t[i2 + 1] = s[1];
392           t[i2 + 2] = s[2];
393           t[i2 + 3] = s[3];
394           t[i2 + 4] = s[4];
395           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
396           x[i2]     = xw[0];
397           x[i2 + 1] = xw[1];
398           x[i2 + 2] = xw[2];
399           x[i2 + 3] = xw[3];
400           x[i2 + 4] = xw[4];
401           idiag += 25;
402           i2 += 5;
403         }
404         break;
405       case 6:
406         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
407         t[0] = b[0];
408         t[1] = b[1];
409         t[2] = b[2];
410         t[3] = b[3];
411         t[4] = b[4];
412         t[5] = b[5];
413         i2   = 6;
414         idiag += 36;
415         for (i = 1; i < m; i++) {
416           v    = aa + 36 * ai[i];
417           vi   = aj + ai[i];
418           nz   = diag[i] - ai[i];
419           s[0] = b[i2];
420           s[1] = b[i2 + 1];
421           s[2] = b[i2 + 2];
422           s[3] = b[i2 + 3];
423           s[4] = b[i2 + 4];
424           s[5] = b[i2 + 5];
425           while (nz--) {
426             idx   = 6 * (*vi++);
427             xw[0] = x[idx];
428             xw[1] = x[1 + idx];
429             xw[2] = x[2 + idx];
430             xw[3] = x[3 + idx];
431             xw[4] = x[4 + idx];
432             xw[5] = x[5 + idx];
433             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
434             v += 36;
435           }
436           t[i2]     = s[0];
437           t[i2 + 1] = s[1];
438           t[i2 + 2] = s[2];
439           t[i2 + 3] = s[3];
440           t[i2 + 4] = s[4];
441           t[i2 + 5] = s[5];
442           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
443           x[i2]     = xw[0];
444           x[i2 + 1] = xw[1];
445           x[i2 + 2] = xw[2];
446           x[i2 + 3] = xw[3];
447           x[i2 + 4] = xw[4];
448           x[i2 + 5] = xw[5];
449           idiag += 36;
450           i2 += 6;
451         }
452         break;
453       case 7:
454         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
455         t[0] = b[0];
456         t[1] = b[1];
457         t[2] = b[2];
458         t[3] = b[3];
459         t[4] = b[4];
460         t[5] = b[5];
461         t[6] = b[6];
462         i2   = 7;
463         idiag += 49;
464         for (i = 1; i < m; i++) {
465           v    = aa + 49 * ai[i];
466           vi   = aj + ai[i];
467           nz   = diag[i] - ai[i];
468           s[0] = b[i2];
469           s[1] = b[i2 + 1];
470           s[2] = b[i2 + 2];
471           s[3] = b[i2 + 3];
472           s[4] = b[i2 + 4];
473           s[5] = b[i2 + 5];
474           s[6] = b[i2 + 6];
475           while (nz--) {
476             idx   = 7 * (*vi++);
477             xw[0] = x[idx];
478             xw[1] = x[1 + idx];
479             xw[2] = x[2 + idx];
480             xw[3] = x[3 + idx];
481             xw[4] = x[4 + idx];
482             xw[5] = x[5 + idx];
483             xw[6] = x[6 + idx];
484             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
485             v += 49;
486           }
487           t[i2]     = s[0];
488           t[i2 + 1] = s[1];
489           t[i2 + 2] = s[2];
490           t[i2 + 3] = s[3];
491           t[i2 + 4] = s[4];
492           t[i2 + 5] = s[5];
493           t[i2 + 6] = s[6];
494           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
495           x[i2]     = xw[0];
496           x[i2 + 1] = xw[1];
497           x[i2 + 2] = xw[2];
498           x[i2 + 3] = xw[3];
499           x[i2 + 4] = xw[4];
500           x[i2 + 5] = xw[5];
501           x[i2 + 6] = xw[6];
502           idiag += 49;
503           i2 += 7;
504         }
505         break;
506       default:
507         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
508         PetscCall(PetscArraycpy(t, b, bs));
509         i2 = bs;
510         idiag += bs2;
511         for (i = 1; i < m; i++) {
512           v  = aa + bs2 * ai[i];
513           vi = aj + ai[i];
514           nz = diag[i] - ai[i];
515 
516           PetscCall(PetscArraycpy(w, b + i2, bs));
517           /* copy all rows of x that are needed into contiguous space */
518           workt = work;
519           for (j = 0; j < nz; j++) {
520             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
521             workt += bs;
522           }
523           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
524           PetscCall(PetscArraycpy(t + i2, w, bs));
525           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
526 
527           idiag += bs2;
528           i2 += bs;
529         }
530         break;
531       }
532       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
533       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
534       xb = t;
535     } else xb = b;
536     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
537       idiag = a->idiag + bs2 * (a->mbs - 1);
538       i2    = bs * (m - 1);
539       switch (bs) {
540       case 1:
541         s[0] = xb[i2];
542         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
543         x[i2] = xw[0];
544         i2 -= 1;
545         for (i = m - 2; i >= 0; i--) {
546           v    = aa + (diag[i] + 1);
547           vi   = aj + diag[i] + 1;
548           nz   = ai[i + 1] - diag[i] - 1;
549           s[0] = xb[i2];
550           for (j = 0; j < nz; j++) {
551             xw[0] = x[vi[j]];
552             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
553           }
554           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
555           x[i2] = xw[0];
556           idiag -= 1;
557           i2 -= 1;
558         }
559         break;
560       case 2:
561         s[0] = xb[i2];
562         s[1] = xb[i2 + 1];
563         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
564         x[i2]     = xw[0];
565         x[i2 + 1] = xw[1];
566         i2 -= 2;
567         idiag -= 4;
568         for (i = m - 2; i >= 0; i--) {
569           v    = aa + 4 * (diag[i] + 1);
570           vi   = aj + diag[i] + 1;
571           nz   = ai[i + 1] - diag[i] - 1;
572           s[0] = xb[i2];
573           s[1] = xb[i2 + 1];
574           for (j = 0; j < nz; j++) {
575             idx   = 2 * vi[j];
576             it    = 4 * j;
577             xw[0] = x[idx];
578             xw[1] = x[1 + idx];
579             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
580           }
581           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
582           x[i2]     = xw[0];
583           x[i2 + 1] = xw[1];
584           idiag -= 4;
585           i2 -= 2;
586         }
587         break;
588       case 3:
589         s[0] = xb[i2];
590         s[1] = xb[i2 + 1];
591         s[2] = xb[i2 + 2];
592         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
593         x[i2]     = xw[0];
594         x[i2 + 1] = xw[1];
595         x[i2 + 2] = xw[2];
596         i2 -= 3;
597         idiag -= 9;
598         for (i = m - 2; i >= 0; i--) {
599           v    = aa + 9 * (diag[i] + 1);
600           vi   = aj + diag[i] + 1;
601           nz   = ai[i + 1] - diag[i] - 1;
602           s[0] = xb[i2];
603           s[1] = xb[i2 + 1];
604           s[2] = xb[i2 + 2];
605           while (nz--) {
606             idx   = 3 * (*vi++);
607             xw[0] = x[idx];
608             xw[1] = x[1 + idx];
609             xw[2] = x[2 + idx];
610             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
611             v += 9;
612           }
613           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
614           x[i2]     = xw[0];
615           x[i2 + 1] = xw[1];
616           x[i2 + 2] = xw[2];
617           idiag -= 9;
618           i2 -= 3;
619         }
620         break;
621       case 4:
622         s[0] = xb[i2];
623         s[1] = xb[i2 + 1];
624         s[2] = xb[i2 + 2];
625         s[3] = xb[i2 + 3];
626         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
627         x[i2]     = xw[0];
628         x[i2 + 1] = xw[1];
629         x[i2 + 2] = xw[2];
630         x[i2 + 3] = xw[3];
631         i2 -= 4;
632         idiag -= 16;
633         for (i = m - 2; i >= 0; i--) {
634           v    = aa + 16 * (diag[i] + 1);
635           vi   = aj + diag[i] + 1;
636           nz   = ai[i + 1] - diag[i] - 1;
637           s[0] = xb[i2];
638           s[1] = xb[i2 + 1];
639           s[2] = xb[i2 + 2];
640           s[3] = xb[i2 + 3];
641           while (nz--) {
642             idx   = 4 * (*vi++);
643             xw[0] = x[idx];
644             xw[1] = x[1 + idx];
645             xw[2] = x[2 + idx];
646             xw[3] = x[3 + idx];
647             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
648             v += 16;
649           }
650           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
651           x[i2]     = xw[0];
652           x[i2 + 1] = xw[1];
653           x[i2 + 2] = xw[2];
654           x[i2 + 3] = xw[3];
655           idiag -= 16;
656           i2 -= 4;
657         }
658         break;
659       case 5:
660         s[0] = xb[i2];
661         s[1] = xb[i2 + 1];
662         s[2] = xb[i2 + 2];
663         s[3] = xb[i2 + 3];
664         s[4] = xb[i2 + 4];
665         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
666         x[i2]     = xw[0];
667         x[i2 + 1] = xw[1];
668         x[i2 + 2] = xw[2];
669         x[i2 + 3] = xw[3];
670         x[i2 + 4] = xw[4];
671         i2 -= 5;
672         idiag -= 25;
673         for (i = m - 2; i >= 0; i--) {
674           v    = aa + 25 * (diag[i] + 1);
675           vi   = aj + diag[i] + 1;
676           nz   = ai[i + 1] - diag[i] - 1;
677           s[0] = xb[i2];
678           s[1] = xb[i2 + 1];
679           s[2] = xb[i2 + 2];
680           s[3] = xb[i2 + 3];
681           s[4] = xb[i2 + 4];
682           while (nz--) {
683             idx   = 5 * (*vi++);
684             xw[0] = x[idx];
685             xw[1] = x[1 + idx];
686             xw[2] = x[2 + idx];
687             xw[3] = x[3 + idx];
688             xw[4] = x[4 + idx];
689             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
690             v += 25;
691           }
692           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
693           x[i2]     = xw[0];
694           x[i2 + 1] = xw[1];
695           x[i2 + 2] = xw[2];
696           x[i2 + 3] = xw[3];
697           x[i2 + 4] = xw[4];
698           idiag -= 25;
699           i2 -= 5;
700         }
701         break;
702       case 6:
703         s[0] = xb[i2];
704         s[1] = xb[i2 + 1];
705         s[2] = xb[i2 + 2];
706         s[3] = xb[i2 + 3];
707         s[4] = xb[i2 + 4];
708         s[5] = xb[i2 + 5];
709         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
710         x[i2]     = xw[0];
711         x[i2 + 1] = xw[1];
712         x[i2 + 2] = xw[2];
713         x[i2 + 3] = xw[3];
714         x[i2 + 4] = xw[4];
715         x[i2 + 5] = xw[5];
716         i2 -= 6;
717         idiag -= 36;
718         for (i = m - 2; i >= 0; i--) {
719           v    = aa + 36 * (diag[i] + 1);
720           vi   = aj + diag[i] + 1;
721           nz   = ai[i + 1] - diag[i] - 1;
722           s[0] = xb[i2];
723           s[1] = xb[i2 + 1];
724           s[2] = xb[i2 + 2];
725           s[3] = xb[i2 + 3];
726           s[4] = xb[i2 + 4];
727           s[5] = xb[i2 + 5];
728           while (nz--) {
729             idx   = 6 * (*vi++);
730             xw[0] = x[idx];
731             xw[1] = x[1 + idx];
732             xw[2] = x[2 + idx];
733             xw[3] = x[3 + idx];
734             xw[4] = x[4 + idx];
735             xw[5] = x[5 + idx];
736             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
737             v += 36;
738           }
739           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
740           x[i2]     = xw[0];
741           x[i2 + 1] = xw[1];
742           x[i2 + 2] = xw[2];
743           x[i2 + 3] = xw[3];
744           x[i2 + 4] = xw[4];
745           x[i2 + 5] = xw[5];
746           idiag -= 36;
747           i2 -= 6;
748         }
749         break;
750       case 7:
751         s[0] = xb[i2];
752         s[1] = xb[i2 + 1];
753         s[2] = xb[i2 + 2];
754         s[3] = xb[i2 + 3];
755         s[4] = xb[i2 + 4];
756         s[5] = xb[i2 + 5];
757         s[6] = xb[i2 + 6];
758         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
759         x[i2]     = xw[0];
760         x[i2 + 1] = xw[1];
761         x[i2 + 2] = xw[2];
762         x[i2 + 3] = xw[3];
763         x[i2 + 4] = xw[4];
764         x[i2 + 5] = xw[5];
765         x[i2 + 6] = xw[6];
766         i2 -= 7;
767         idiag -= 49;
768         for (i = m - 2; i >= 0; i--) {
769           v    = aa + 49 * (diag[i] + 1);
770           vi   = aj + diag[i] + 1;
771           nz   = ai[i + 1] - diag[i] - 1;
772           s[0] = xb[i2];
773           s[1] = xb[i2 + 1];
774           s[2] = xb[i2 + 2];
775           s[3] = xb[i2 + 3];
776           s[4] = xb[i2 + 4];
777           s[5] = xb[i2 + 5];
778           s[6] = xb[i2 + 6];
779           while (nz--) {
780             idx   = 7 * (*vi++);
781             xw[0] = x[idx];
782             xw[1] = x[1 + idx];
783             xw[2] = x[2 + idx];
784             xw[3] = x[3 + idx];
785             xw[4] = x[4 + idx];
786             xw[5] = x[5 + idx];
787             xw[6] = x[6 + idx];
788             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
789             v += 49;
790           }
791           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
792           x[i2]     = xw[0];
793           x[i2 + 1] = xw[1];
794           x[i2 + 2] = xw[2];
795           x[i2 + 3] = xw[3];
796           x[i2 + 4] = xw[4];
797           x[i2 + 5] = xw[5];
798           x[i2 + 6] = xw[6];
799           idiag -= 49;
800           i2 -= 7;
801         }
802         break;
803       default:
804         PetscCall(PetscArraycpy(w, xb + i2, bs));
805         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
806         i2 -= bs;
807         idiag -= bs2;
808         for (i = m - 2; i >= 0; i--) {
809           v  = aa + bs2 * (diag[i] + 1);
810           vi = aj + diag[i] + 1;
811           nz = ai[i + 1] - diag[i] - 1;
812 
813           PetscCall(PetscArraycpy(w, xb + i2, bs));
814           /* copy all rows of x that are needed into contiguous space */
815           workt = work;
816           for (j = 0; j < nz; j++) {
817             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
818             workt += bs;
819           }
820           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
821           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
822 
823           idiag -= bs2;
824           i2 -= bs;
825         }
826         break;
827       }
828       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
829     }
830     its--;
831   }
832   while (its--) {
833     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
834       idiag = a->idiag;
835       i2    = 0;
836       switch (bs) {
837       case 1:
838         for (i = 0; i < m; i++) {
839           v    = aa + ai[i];
840           vi   = aj + ai[i];
841           nz   = ai[i + 1] - ai[i];
842           s[0] = b[i2];
843           for (j = 0; j < nz; j++) {
844             xw[0] = x[vi[j]];
845             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
846           }
847           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
848           x[i2] += xw[0];
849           idiag += 1;
850           i2 += 1;
851         }
852         break;
853       case 2:
854         for (i = 0; i < m; i++) {
855           v    = aa + 4 * ai[i];
856           vi   = aj + ai[i];
857           nz   = ai[i + 1] - ai[i];
858           s[0] = b[i2];
859           s[1] = b[i2 + 1];
860           for (j = 0; j < nz; j++) {
861             idx   = 2 * vi[j];
862             it    = 4 * j;
863             xw[0] = x[idx];
864             xw[1] = x[1 + idx];
865             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
866           }
867           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
868           x[i2] += xw[0];
869           x[i2 + 1] += xw[1];
870           idiag += 4;
871           i2 += 2;
872         }
873         break;
874       case 3:
875         for (i = 0; i < m; i++) {
876           v    = aa + 9 * ai[i];
877           vi   = aj + ai[i];
878           nz   = ai[i + 1] - ai[i];
879           s[0] = b[i2];
880           s[1] = b[i2 + 1];
881           s[2] = b[i2 + 2];
882           while (nz--) {
883             idx   = 3 * (*vi++);
884             xw[0] = x[idx];
885             xw[1] = x[1 + idx];
886             xw[2] = x[2 + idx];
887             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
888             v += 9;
889           }
890           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
891           x[i2] += xw[0];
892           x[i2 + 1] += xw[1];
893           x[i2 + 2] += xw[2];
894           idiag += 9;
895           i2 += 3;
896         }
897         break;
898       case 4:
899         for (i = 0; i < m; i++) {
900           v    = aa + 16 * ai[i];
901           vi   = aj + ai[i];
902           nz   = ai[i + 1] - ai[i];
903           s[0] = b[i2];
904           s[1] = b[i2 + 1];
905           s[2] = b[i2 + 2];
906           s[3] = b[i2 + 3];
907           while (nz--) {
908             idx   = 4 * (*vi++);
909             xw[0] = x[idx];
910             xw[1] = x[1 + idx];
911             xw[2] = x[2 + idx];
912             xw[3] = x[3 + idx];
913             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
914             v += 16;
915           }
916           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
917           x[i2] += xw[0];
918           x[i2 + 1] += xw[1];
919           x[i2 + 2] += xw[2];
920           x[i2 + 3] += xw[3];
921           idiag += 16;
922           i2 += 4;
923         }
924         break;
925       case 5:
926         for (i = 0; i < m; i++) {
927           v    = aa + 25 * ai[i];
928           vi   = aj + ai[i];
929           nz   = ai[i + 1] - ai[i];
930           s[0] = b[i2];
931           s[1] = b[i2 + 1];
932           s[2] = b[i2 + 2];
933           s[3] = b[i2 + 3];
934           s[4] = b[i2 + 4];
935           while (nz--) {
936             idx   = 5 * (*vi++);
937             xw[0] = x[idx];
938             xw[1] = x[1 + idx];
939             xw[2] = x[2 + idx];
940             xw[3] = x[3 + idx];
941             xw[4] = x[4 + idx];
942             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
943             v += 25;
944           }
945           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
946           x[i2] += xw[0];
947           x[i2 + 1] += xw[1];
948           x[i2 + 2] += xw[2];
949           x[i2 + 3] += xw[3];
950           x[i2 + 4] += xw[4];
951           idiag += 25;
952           i2 += 5;
953         }
954         break;
955       case 6:
956         for (i = 0; i < m; i++) {
957           v    = aa + 36 * ai[i];
958           vi   = aj + ai[i];
959           nz   = ai[i + 1] - ai[i];
960           s[0] = b[i2];
961           s[1] = b[i2 + 1];
962           s[2] = b[i2 + 2];
963           s[3] = b[i2 + 3];
964           s[4] = b[i2 + 4];
965           s[5] = b[i2 + 5];
966           while (nz--) {
967             idx   = 6 * (*vi++);
968             xw[0] = x[idx];
969             xw[1] = x[1 + idx];
970             xw[2] = x[2 + idx];
971             xw[3] = x[3 + idx];
972             xw[4] = x[4 + idx];
973             xw[5] = x[5 + idx];
974             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
975             v += 36;
976           }
977           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
978           x[i2] += xw[0];
979           x[i2 + 1] += xw[1];
980           x[i2 + 2] += xw[2];
981           x[i2 + 3] += xw[3];
982           x[i2 + 4] += xw[4];
983           x[i2 + 5] += xw[5];
984           idiag += 36;
985           i2 += 6;
986         }
987         break;
988       case 7:
989         for (i = 0; i < m; i++) {
990           v    = aa + 49 * ai[i];
991           vi   = aj + ai[i];
992           nz   = ai[i + 1] - ai[i];
993           s[0] = b[i2];
994           s[1] = b[i2 + 1];
995           s[2] = b[i2 + 2];
996           s[3] = b[i2 + 3];
997           s[4] = b[i2 + 4];
998           s[5] = b[i2 + 5];
999           s[6] = b[i2 + 6];
1000           while (nz--) {
1001             idx   = 7 * (*vi++);
1002             xw[0] = x[idx];
1003             xw[1] = x[1 + idx];
1004             xw[2] = x[2 + idx];
1005             xw[3] = x[3 + idx];
1006             xw[4] = x[4 + idx];
1007             xw[5] = x[5 + idx];
1008             xw[6] = x[6 + idx];
1009             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1010             v += 49;
1011           }
1012           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1013           x[i2] += xw[0];
1014           x[i2 + 1] += xw[1];
1015           x[i2 + 2] += xw[2];
1016           x[i2 + 3] += xw[3];
1017           x[i2 + 4] += xw[4];
1018           x[i2 + 5] += xw[5];
1019           x[i2 + 6] += xw[6];
1020           idiag += 49;
1021           i2 += 7;
1022         }
1023         break;
1024       default:
1025         for (i = 0; i < m; i++) {
1026           v  = aa + bs2 * ai[i];
1027           vi = aj + ai[i];
1028           nz = ai[i + 1] - ai[i];
1029 
1030           PetscCall(PetscArraycpy(w, b + i2, bs));
1031           /* copy all rows of x that are needed into contiguous space */
1032           workt = work;
1033           for (j = 0; j < nz; j++) {
1034             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1035             workt += bs;
1036           }
1037           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1038           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1039 
1040           idiag += bs2;
1041           i2 += bs;
1042         }
1043         break;
1044       }
1045       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1046     }
1047     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1048       idiag = a->idiag + bs2 * (a->mbs - 1);
1049       i2    = bs * (m - 1);
1050       switch (bs) {
1051       case 1:
1052         for (i = m - 1; i >= 0; i--) {
1053           v    = aa + ai[i];
1054           vi   = aj + ai[i];
1055           nz   = ai[i + 1] - ai[i];
1056           s[0] = b[i2];
1057           for (j = 0; j < nz; j++) {
1058             xw[0] = x[vi[j]];
1059             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1060           }
1061           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1062           x[i2] += xw[0];
1063           idiag -= 1;
1064           i2 -= 1;
1065         }
1066         break;
1067       case 2:
1068         for (i = m - 1; i >= 0; i--) {
1069           v    = aa + 4 * ai[i];
1070           vi   = aj + ai[i];
1071           nz   = ai[i + 1] - ai[i];
1072           s[0] = b[i2];
1073           s[1] = b[i2 + 1];
1074           for (j = 0; j < nz; j++) {
1075             idx   = 2 * vi[j];
1076             it    = 4 * j;
1077             xw[0] = x[idx];
1078             xw[1] = x[1 + idx];
1079             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1080           }
1081           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
1082           x[i2] += xw[0];
1083           x[i2 + 1] += xw[1];
1084           idiag -= 4;
1085           i2 -= 2;
1086         }
1087         break;
1088       case 3:
1089         for (i = m - 1; i >= 0; i--) {
1090           v    = aa + 9 * ai[i];
1091           vi   = aj + ai[i];
1092           nz   = ai[i + 1] - ai[i];
1093           s[0] = b[i2];
1094           s[1] = b[i2 + 1];
1095           s[2] = b[i2 + 2];
1096           while (nz--) {
1097             idx   = 3 * (*vi++);
1098             xw[0] = x[idx];
1099             xw[1] = x[1 + idx];
1100             xw[2] = x[2 + idx];
1101             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1102             v += 9;
1103           }
1104           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
1105           x[i2] += xw[0];
1106           x[i2 + 1] += xw[1];
1107           x[i2 + 2] += xw[2];
1108           idiag -= 9;
1109           i2 -= 3;
1110         }
1111         break;
1112       case 4:
1113         for (i = m - 1; i >= 0; i--) {
1114           v    = aa + 16 * ai[i];
1115           vi   = aj + ai[i];
1116           nz   = ai[i + 1] - ai[i];
1117           s[0] = b[i2];
1118           s[1] = b[i2 + 1];
1119           s[2] = b[i2 + 2];
1120           s[3] = b[i2 + 3];
1121           while (nz--) {
1122             idx   = 4 * (*vi++);
1123             xw[0] = x[idx];
1124             xw[1] = x[1 + idx];
1125             xw[2] = x[2 + idx];
1126             xw[3] = x[3 + idx];
1127             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1128             v += 16;
1129           }
1130           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
1131           x[i2] += xw[0];
1132           x[i2 + 1] += xw[1];
1133           x[i2 + 2] += xw[2];
1134           x[i2 + 3] += xw[3];
1135           idiag -= 16;
1136           i2 -= 4;
1137         }
1138         break;
1139       case 5:
1140         for (i = m - 1; i >= 0; i--) {
1141           v    = aa + 25 * ai[i];
1142           vi   = aj + ai[i];
1143           nz   = ai[i + 1] - ai[i];
1144           s[0] = b[i2];
1145           s[1] = b[i2 + 1];
1146           s[2] = b[i2 + 2];
1147           s[3] = b[i2 + 3];
1148           s[4] = b[i2 + 4];
1149           while (nz--) {
1150             idx   = 5 * (*vi++);
1151             xw[0] = x[idx];
1152             xw[1] = x[1 + idx];
1153             xw[2] = x[2 + idx];
1154             xw[3] = x[3 + idx];
1155             xw[4] = x[4 + idx];
1156             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1157             v += 25;
1158           }
1159           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
1160           x[i2] += xw[0];
1161           x[i2 + 1] += xw[1];
1162           x[i2 + 2] += xw[2];
1163           x[i2 + 3] += xw[3];
1164           x[i2 + 4] += xw[4];
1165           idiag -= 25;
1166           i2 -= 5;
1167         }
1168         break;
1169       case 6:
1170         for (i = m - 1; i >= 0; i--) {
1171           v    = aa + 36 * ai[i];
1172           vi   = aj + ai[i];
1173           nz   = ai[i + 1] - ai[i];
1174           s[0] = b[i2];
1175           s[1] = b[i2 + 1];
1176           s[2] = b[i2 + 2];
1177           s[3] = b[i2 + 3];
1178           s[4] = b[i2 + 4];
1179           s[5] = b[i2 + 5];
1180           while (nz--) {
1181             idx   = 6 * (*vi++);
1182             xw[0] = x[idx];
1183             xw[1] = x[1 + idx];
1184             xw[2] = x[2 + idx];
1185             xw[3] = x[3 + idx];
1186             xw[4] = x[4 + idx];
1187             xw[5] = x[5 + idx];
1188             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1189             v += 36;
1190           }
1191           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
1192           x[i2] += xw[0];
1193           x[i2 + 1] += xw[1];
1194           x[i2 + 2] += xw[2];
1195           x[i2 + 3] += xw[3];
1196           x[i2 + 4] += xw[4];
1197           x[i2 + 5] += xw[5];
1198           idiag -= 36;
1199           i2 -= 6;
1200         }
1201         break;
1202       case 7:
1203         for (i = m - 1; i >= 0; i--) {
1204           v    = aa + 49 * ai[i];
1205           vi   = aj + ai[i];
1206           nz   = ai[i + 1] - ai[i];
1207           s[0] = b[i2];
1208           s[1] = b[i2 + 1];
1209           s[2] = b[i2 + 2];
1210           s[3] = b[i2 + 3];
1211           s[4] = b[i2 + 4];
1212           s[5] = b[i2 + 5];
1213           s[6] = b[i2 + 6];
1214           while (nz--) {
1215             idx   = 7 * (*vi++);
1216             xw[0] = x[idx];
1217             xw[1] = x[1 + idx];
1218             xw[2] = x[2 + idx];
1219             xw[3] = x[3 + idx];
1220             xw[4] = x[4 + idx];
1221             xw[5] = x[5 + idx];
1222             xw[6] = x[6 + idx];
1223             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1224             v += 49;
1225           }
1226           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1227           x[i2] += xw[0];
1228           x[i2 + 1] += xw[1];
1229           x[i2 + 2] += xw[2];
1230           x[i2 + 3] += xw[3];
1231           x[i2 + 4] += xw[4];
1232           x[i2 + 5] += xw[5];
1233           x[i2 + 6] += xw[6];
1234           idiag -= 49;
1235           i2 -= 7;
1236         }
1237         break;
1238       default:
1239         for (i = m - 1; i >= 0; i--) {
1240           v  = aa + bs2 * ai[i];
1241           vi = aj + ai[i];
1242           nz = ai[i + 1] - ai[i];
1243 
1244           PetscCall(PetscArraycpy(w, b + i2, bs));
1245           /* copy all rows of x that are needed into contiguous space */
1246           workt = work;
1247           for (j = 0; j < nz; j++) {
1248             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1249             workt += bs;
1250           }
1251           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1252           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1253 
1254           idiag -= bs2;
1255           i2 -= bs;
1256         }
1257         break;
1258       }
1259       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1260     }
1261   }
1262   PetscCall(VecRestoreArray(xx, &x));
1263   PetscCall(VecRestoreArrayRead(bb, &b));
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 /*
1268     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1269 */
1270 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1271 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1272 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1273 #define matsetvaluesblocked4_ matsetvaluesblocked4
1274 #endif
1275 
1276 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) {
1277   Mat                A = *AA;
1278   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1279   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1280   PetscInt          *ai = a->i, *ailen = a->ilen;
1281   PetscInt          *aj = a->j, stepval, lastcol = -1;
1282   const PetscScalar *value = v;
1283   MatScalar         *ap, *aa = a->a, *bap;
1284 
1285   PetscFunctionBegin;
1286   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1287   stepval = (n - 1) * 4;
1288   for (k = 0; k < m; k++) { /* loop over added rows */
1289     row  = im[k];
1290     rp   = aj + ai[row];
1291     ap   = aa + 16 * ai[row];
1292     nrow = ailen[row];
1293     low  = 0;
1294     high = nrow;
1295     for (l = 0; l < n; l++) { /* loop over added columns */
1296       col = in[l];
1297       if (col <= lastcol) low = 0;
1298       else high = nrow;
1299       lastcol = col;
1300       value   = v + k * (stepval + 4 + l) * 4;
1301       while (high - low > 7) {
1302         t = (low + high) / 2;
1303         if (rp[t] > col) high = t;
1304         else low = t;
1305       }
1306       for (i = low; i < high; i++) {
1307         if (rp[i] > col) break;
1308         if (rp[i] == col) {
1309           bap = ap + 16 * i;
1310           for (ii = 0; ii < 4; ii++, value += stepval) {
1311             for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1312           }
1313           goto noinsert2;
1314         }
1315       }
1316       N = nrow++ - 1;
1317       high++; /* added new column index thus must search to one higher than before */
1318       /* shift up all the later entries in this row */
1319       for (ii = N; ii >= i; ii--) {
1320         rp[ii + 1] = rp[ii];
1321         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1322       }
1323       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1324       rp[i] = col;
1325       bap   = ap + 16 * i;
1326       for (ii = 0; ii < 4; ii++, value += stepval) {
1327         for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1328       }
1329     noinsert2:;
1330       low = i;
1331     }
1332     ailen[row] = nrow;
1333   }
1334   PetscFunctionReturnVoid();
1335 }
1336 
1337 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1338 #define matsetvalues4_ MATSETVALUES4
1339 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1340 #define matsetvalues4_ matsetvalues4
1341 #endif
1342 
1343 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) {
1344   Mat          A = *AA;
1345   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1346   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1347   PetscInt    *ai = a->i, *ailen = a->ilen;
1348   PetscInt    *aj = a->j, brow, bcol;
1349   PetscInt     ridx, cidx, lastcol = -1;
1350   MatScalar   *ap, value, *aa      = a->a, *bap;
1351 
1352   PetscFunctionBegin;
1353   for (k = 0; k < m; k++) { /* loop over added rows */
1354     row  = im[k];
1355     brow = row / 4;
1356     rp   = aj + ai[brow];
1357     ap   = aa + 16 * ai[brow];
1358     nrow = ailen[brow];
1359     low  = 0;
1360     high = nrow;
1361     for (l = 0; l < n; l++) { /* loop over added columns */
1362       col   = in[l];
1363       bcol  = col / 4;
1364       ridx  = row % 4;
1365       cidx  = col % 4;
1366       value = v[l + k * n];
1367       if (col <= lastcol) low = 0;
1368       else high = nrow;
1369       lastcol = col;
1370       while (high - low > 7) {
1371         t = (low + high) / 2;
1372         if (rp[t] > bcol) high = t;
1373         else low = t;
1374       }
1375       for (i = low; i < high; i++) {
1376         if (rp[i] > bcol) break;
1377         if (rp[i] == bcol) {
1378           bap = ap + 16 * i + 4 * cidx + ridx;
1379           *bap += value;
1380           goto noinsert1;
1381         }
1382       }
1383       N = nrow++ - 1;
1384       high++; /* added new column thus must search to one higher than before */
1385       /* shift up all the later entries in this row */
1386       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
1387       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
1388       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1389       rp[i]                        = bcol;
1390       ap[16 * i + 4 * cidx + ridx] = value;
1391     noinsert1:;
1392       low = i;
1393     }
1394     ailen[brow] = nrow;
1395   }
1396   PetscFunctionReturnVoid();
1397 }
1398 
1399 /*
1400      Checks for missing diagonals
1401 */
1402 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d) {
1403   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1404   PetscInt    *diag, *ii = a->i, i;
1405 
1406   PetscFunctionBegin;
1407   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
1408   *missing = PETSC_FALSE;
1409   if (A->rmap->n > 0 && !ii) {
1410     *missing = PETSC_TRUE;
1411     if (d) *d = 0;
1412     PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n"));
1413   } else {
1414     PetscInt n;
1415     n    = PetscMin(a->mbs, a->nbs);
1416     diag = a->diag;
1417     for (i = 0; i < n; i++) {
1418       if (diag[i] >= ii[i + 1]) {
1419         *missing = PETSC_TRUE;
1420         if (d) *d = i;
1421         PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i));
1422         break;
1423       }
1424     }
1425   }
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) {
1430   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1431   PetscInt     i, j, m = a->mbs;
1432 
1433   PetscFunctionBegin;
1434   if (!a->diag) {
1435     PetscCall(PetscMalloc1(m, &a->diag));
1436     PetscCall(PetscLogObjectMemory((PetscObject)A, m * sizeof(PetscInt)));
1437     a->free_diag = PETSC_TRUE;
1438   }
1439   for (i = 0; i < m; i++) {
1440     a->diag[i] = a->i[i + 1];
1441     for (j = a->i[i]; j < a->i[i + 1]; j++) {
1442       if (a->j[j] == i) {
1443         a->diag[i] = j;
1444         break;
1445       }
1446     }
1447   }
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) {
1452   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1453   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
1454   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
1455 
1456   PetscFunctionBegin;
1457   *nn = n;
1458   if (!ia) PetscFunctionReturn(0);
1459   if (symmetric) {
1460     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1461     nz = tia[n];
1462   } else {
1463     tia = a->i;
1464     tja = a->j;
1465   }
1466 
1467   if (!blockcompressed && bs > 1) {
1468     (*nn) *= bs;
1469     /* malloc & create the natural set of indices */
1470     PetscCall(PetscMalloc1((n + 1) * bs, ia));
1471     if (n) {
1472       (*ia)[0] = oshift;
1473       for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
1474     }
1475 
1476     for (i = 1; i < n; i++) {
1477       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1478       for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
1479     }
1480     if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1481 
1482     if (inja) {
1483       PetscCall(PetscMalloc1(nz * bs * bs, ja));
1484       cnt = 0;
1485       for (i = 0; i < n; i++) {
1486         for (j = 0; j < bs; j++) {
1487           for (k = tia[i]; k < tia[i + 1]; k++) {
1488             for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
1489           }
1490         }
1491       }
1492     }
1493 
1494     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1495       PetscCall(PetscFree(tia));
1496       PetscCall(PetscFree(tja));
1497     }
1498   } else if (oshift == 1) {
1499     if (symmetric) {
1500       nz = tia[A->rmap->n / bs];
1501       /*  add 1 to i and j indices */
1502       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1503       *ia = tia;
1504       if (ja) {
1505         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1506         *ja = tja;
1507       }
1508     } else {
1509       nz = a->i[A->rmap->n / bs];
1510       /* malloc space and  add 1 to i and j indices */
1511       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1512       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1513       if (ja) {
1514         PetscCall(PetscMalloc1(nz, ja));
1515         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1516       }
1517     }
1518   } else {
1519     *ia = tia;
1520     if (ja) *ja = tja;
1521   }
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) {
1526   PetscFunctionBegin;
1527   if (!ia) PetscFunctionReturn(0);
1528   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1529     PetscCall(PetscFree(*ia));
1530     if (ja) PetscCall(PetscFree(*ja));
1531   }
1532   PetscFunctionReturn(0);
1533 }
1534 
1535 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) {
1536   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1537 
1538   PetscFunctionBegin;
1539 #if defined(PETSC_USE_LOG)
1540   PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz);
1541 #endif
1542   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
1543   PetscCall(ISDestroy(&a->row));
1544   PetscCall(ISDestroy(&a->col));
1545   if (a->free_diag) PetscCall(PetscFree(a->diag));
1546   PetscCall(PetscFree(a->idiag));
1547   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
1548   PetscCall(PetscFree(a->solve_work));
1549   PetscCall(PetscFree(a->mult_work));
1550   PetscCall(PetscFree(a->sor_workt));
1551   PetscCall(PetscFree(a->sor_work));
1552   PetscCall(ISDestroy(&a->icol));
1553   PetscCall(PetscFree(a->saved_values));
1554   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1555 
1556   PetscCall(MatDestroy(&a->sbaijMat));
1557   PetscCall(MatDestroy(&a->parent));
1558   PetscCall(PetscFree(A->data));
1559 
1560   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
1561   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
1562   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
1563   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
1564   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
1565   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
1566   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
1567   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
1568   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
1569   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
1570   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
1571   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
1572 #if defined(PETSC_HAVE_HYPRE)
1573   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
1574 #endif
1575   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
1576   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) {
1581   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1582 
1583   PetscFunctionBegin;
1584   switch (op) {
1585   case MAT_ROW_ORIENTED: a->roworiented = flg; break;
1586   case MAT_KEEP_NONZERO_PATTERN: a->keepnonzeropattern = flg; break;
1587   case MAT_NEW_NONZERO_LOCATIONS: a->nonew = (flg ? 0 : 1); break;
1588   case MAT_NEW_NONZERO_LOCATION_ERR: a->nonew = (flg ? -1 : 0); break;
1589   case MAT_NEW_NONZERO_ALLOCATION_ERR: a->nonew = (flg ? -2 : 0); break;
1590   case MAT_UNUSED_NONZERO_LOCATION_ERR: a->nounused = (flg ? -1 : 0); break;
1591   case MAT_FORCE_DIAGONAL_ENTRIES:
1592   case MAT_IGNORE_OFF_PROC_ENTRIES:
1593   case MAT_USE_HASH_TABLE:
1594   case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break;
1595   case MAT_SPD:
1596   case MAT_SYMMETRIC:
1597   case MAT_STRUCTURALLY_SYMMETRIC:
1598   case MAT_HERMITIAN:
1599   case MAT_SYMMETRY_ETERNAL:
1600   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1601   case MAT_SUBMAT_SINGLEIS:
1602   case MAT_STRUCTURE_ONLY:
1603   case MAT_SPD_ETERNAL:
1604     /* if the diagonal matrix is square it inherits some of the properties above */
1605     break;
1606   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1607   }
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1612 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) {
1613   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
1614   MatScalar   *aa_i;
1615   PetscScalar *v_i;
1616 
1617   PetscFunctionBegin;
1618   bs  = A->rmap->bs;
1619   bs2 = bs * bs;
1620   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1621 
1622   bn  = row / bs; /* Block number */
1623   bp  = row % bs; /* Block Position */
1624   M   = ai[bn + 1] - ai[bn];
1625   *nz = bs * M;
1626 
1627   if (v) {
1628     *v = NULL;
1629     if (*nz) {
1630       PetscCall(PetscMalloc1(*nz, v));
1631       for (i = 0; i < M; i++) { /* for each block in the block row */
1632         v_i  = *v + i * bs;
1633         aa_i = aa + bs2 * (ai[bn] + i);
1634         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
1635       }
1636     }
1637   }
1638 
1639   if (idx) {
1640     *idx = NULL;
1641     if (*nz) {
1642       PetscCall(PetscMalloc1(*nz, idx));
1643       for (i = 0; i < M; i++) { /* for each block in the block row */
1644         idx_i = *idx + i * bs;
1645         itmp  = bs * aj[ai[bn] + i];
1646         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
1647       }
1648     }
1649   }
1650   PetscFunctionReturn(0);
1651 }
1652 
1653 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1654   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1655 
1656   PetscFunctionBegin;
1657   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
1658   PetscFunctionReturn(0);
1659 }
1660 
1661 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1662   PetscFunctionBegin;
1663   if (nz) *nz = 0;
1664   if (idx) PetscCall(PetscFree(*idx));
1665   if (v) PetscCall(PetscFree(*v));
1666   PetscFunctionReturn(0);
1667 }
1668 
1669 PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) {
1670   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
1671   Mat          C;
1672   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
1673   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
1674   MatScalar   *ata, *aa = a->a;
1675 
1676   PetscFunctionBegin;
1677   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1678   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1679   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1680     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1681 
1682     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
1683     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
1684     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
1685     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
1686 
1687     at  = (Mat_SeqBAIJ *)C->data;
1688     ati = at->i;
1689     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1690   } else {
1691     C   = *B;
1692     at  = (Mat_SeqBAIJ *)C->data;
1693     ati = at->i;
1694   }
1695 
1696   atj = at->j;
1697   ata = at->a;
1698 
1699   /* Copy ati into atfill so we have locations of the next free space in atj */
1700   PetscCall(PetscArraycpy(atfill, ati, nbs));
1701 
1702   /* Walk through A row-wise and mark nonzero entries of A^T. */
1703   for (i = 0; i < mbs; i++) {
1704     anzj = ai[i + 1] - ai[i];
1705     for (j = 0; j < anzj; j++) {
1706       atj[atfill[*aj]] = i;
1707       for (kr = 0; kr < bs; kr++) {
1708         for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
1709       }
1710       atfill[*aj++] += 1;
1711     }
1712   }
1713   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1714   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1715 
1716   /* Clean up temporary space and complete requests. */
1717   PetscCall(PetscFree(atfill));
1718 
1719   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1720     PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1721     *B = C;
1722   } else {
1723     PetscCall(MatHeaderMerge(A, &C));
1724   }
1725   PetscFunctionReturn(0);
1726 }
1727 
1728 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) {
1729   Mat Btrans;
1730 
1731   PetscFunctionBegin;
1732   *f = PETSC_FALSE;
1733   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
1734   PetscCall(MatEqual_SeqBAIJ(B, Btrans, f));
1735   PetscCall(MatDestroy(&Btrans));
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1740 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) {
1741   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1742   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1743   PetscInt    *rowlens, *colidxs;
1744   PetscScalar *matvals;
1745 
1746   PetscFunctionBegin;
1747   PetscCall(PetscViewerSetUp(viewer));
1748 
1749   M  = mat->rmap->N;
1750   N  = mat->cmap->N;
1751   m  = mat->rmap->n;
1752   bs = mat->rmap->bs;
1753   nz = bs * bs * A->nz;
1754 
1755   /* write matrix header */
1756   header[0] = MAT_FILE_CLASSID;
1757   header[1] = M;
1758   header[2] = N;
1759   header[3] = nz;
1760   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1761 
1762   /* store row lengths */
1763   PetscCall(PetscMalloc1(m, &rowlens));
1764   for (cnt = 0, i = 0; i < A->mbs; i++)
1765     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
1766   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
1767   PetscCall(PetscFree(rowlens));
1768 
1769   /* store column indices  */
1770   PetscCall(PetscMalloc1(nz, &colidxs));
1771   for (cnt = 0, i = 0; i < A->mbs; i++)
1772     for (k = 0; k < bs; k++)
1773       for (j = A->i[i]; j < A->i[i + 1]; j++)
1774         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
1775   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1776   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
1777   PetscCall(PetscFree(colidxs));
1778 
1779   /* store nonzero values */
1780   PetscCall(PetscMalloc1(nz, &matvals));
1781   for (cnt = 0, i = 0; i < A->mbs; i++)
1782     for (k = 0; k < bs; k++)
1783       for (j = A->i[i]; j < A->i[i + 1]; j++)
1784         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
1785   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1786   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
1787   PetscCall(PetscFree(matvals));
1788 
1789   /* write block size option to the viewer's .info file */
1790   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) {
1795   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1796   PetscInt     i, bs = A->rmap->bs, k;
1797 
1798   PetscFunctionBegin;
1799   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1800   for (i = 0; i < a->mbs; i++) {
1801     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
1802     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
1803     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1804   }
1805   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) {
1810   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1811   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1812   PetscViewerFormat format;
1813 
1814   PetscFunctionBegin;
1815   if (A->structure_only) {
1816     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
1817     PetscFunctionReturn(0);
1818   }
1819 
1820   PetscCall(PetscViewerGetFormat(viewer, &format));
1821   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1822     PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1823   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1824     const char *matname;
1825     Mat         aij;
1826     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
1827     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
1828     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
1829     PetscCall(MatView(aij, viewer));
1830     PetscCall(MatDestroy(&aij));
1831   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1832     PetscFunctionReturn(0);
1833   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1834     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1835     for (i = 0; i < a->mbs; i++) {
1836       for (j = 0; j < bs; j++) {
1837         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1838         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1839           for (l = 0; l < bs; l++) {
1840 #if defined(PETSC_USE_COMPLEX)
1841             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1842               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1843             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1844               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1845             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1846               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1847             }
1848 #else
1849             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1850 #endif
1851           }
1852         }
1853         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1854       }
1855     }
1856     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1857   } else {
1858     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1859     for (i = 0; i < a->mbs; i++) {
1860       for (j = 0; j < bs; j++) {
1861         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1862         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1863           for (l = 0; l < bs; l++) {
1864 #if defined(PETSC_USE_COMPLEX)
1865             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
1866               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1867             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
1868               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1869             } else {
1870               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1871             }
1872 #else
1873             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1874 #endif
1875           }
1876         }
1877         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1878       }
1879     }
1880     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1881   }
1882   PetscCall(PetscViewerFlush(viewer));
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #include <petscdraw.h>
1887 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) {
1888   Mat               A = (Mat)Aa;
1889   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1890   PetscInt          row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2;
1891   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1892   MatScalar        *aa;
1893   PetscViewer       viewer;
1894   PetscViewerFormat format;
1895 
1896   PetscFunctionBegin;
1897   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
1898   PetscCall(PetscViewerGetFormat(viewer, &format));
1899   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
1900 
1901   /* loop over matrix elements drawing boxes */
1902 
1903   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1904     PetscDrawCollectiveBegin(draw);
1905     /* Blue for negative, Cyan for zero and  Red for positive */
1906     color = PETSC_DRAW_BLUE;
1907     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1908       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1909         y_l = A->rmap->N - row - 1.0;
1910         y_r = y_l + 1.0;
1911         x_l = a->j[j] * bs;
1912         x_r = x_l + 1.0;
1913         aa  = a->a + j * bs2;
1914         for (k = 0; k < bs; k++) {
1915           for (l = 0; l < bs; l++) {
1916             if (PetscRealPart(*aa++) >= 0.) continue;
1917             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1918           }
1919         }
1920       }
1921     }
1922     color = PETSC_DRAW_CYAN;
1923     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1924       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1925         y_l = A->rmap->N - row - 1.0;
1926         y_r = y_l + 1.0;
1927         x_l = a->j[j] * bs;
1928         x_r = x_l + 1.0;
1929         aa  = a->a + j * bs2;
1930         for (k = 0; k < bs; k++) {
1931           for (l = 0; l < bs; l++) {
1932             if (PetscRealPart(*aa++) != 0.) continue;
1933             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1934           }
1935         }
1936       }
1937     }
1938     color = PETSC_DRAW_RED;
1939     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1940       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1941         y_l = A->rmap->N - row - 1.0;
1942         y_r = y_l + 1.0;
1943         x_l = a->j[j] * bs;
1944         x_r = x_l + 1.0;
1945         aa  = a->a + j * bs2;
1946         for (k = 0; k < bs; k++) {
1947           for (l = 0; l < bs; l++) {
1948             if (PetscRealPart(*aa++) <= 0.) continue;
1949             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1950           }
1951         }
1952       }
1953     }
1954     PetscDrawCollectiveEnd(draw);
1955   } else {
1956     /* use contour shading to indicate magnitude of values */
1957     /* first determine max of all nonzero values */
1958     PetscReal minv = 0.0, maxv = 0.0;
1959     PetscDraw popup;
1960 
1961     for (i = 0; i < a->nz * a->bs2; i++) {
1962       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1963     }
1964     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1965     PetscCall(PetscDrawGetPopup(draw, &popup));
1966     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
1967 
1968     PetscDrawCollectiveBegin(draw);
1969     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1970       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1971         y_l = A->rmap->N - row - 1.0;
1972         y_r = y_l + 1.0;
1973         x_l = a->j[j] * bs;
1974         x_r = x_l + 1.0;
1975         aa  = a->a + j * bs2;
1976         for (k = 0; k < bs; k++) {
1977           for (l = 0; l < bs; l++) {
1978             MatScalar v = *aa++;
1979             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
1980             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1981           }
1982         }
1983       }
1984     }
1985     PetscDrawCollectiveEnd(draw);
1986   }
1987   PetscFunctionReturn(0);
1988 }
1989 
1990 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) {
1991   PetscReal xl, yl, xr, yr, w, h;
1992   PetscDraw draw;
1993   PetscBool isnull;
1994 
1995   PetscFunctionBegin;
1996   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1997   PetscCall(PetscDrawIsNull(draw, &isnull));
1998   if (isnull) PetscFunctionReturn(0);
1999 
2000   xr = A->cmap->n;
2001   yr = A->rmap->N;
2002   h  = yr / 10.0;
2003   w  = xr / 10.0;
2004   xr += w;
2005   yr += h;
2006   xl = -w;
2007   yl = -h;
2008   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
2009   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
2010   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
2011   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
2012   PetscCall(PetscDrawSave(draw));
2013   PetscFunctionReturn(0);
2014 }
2015 
2016 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) {
2017   PetscBool iascii, isbinary, isdraw;
2018 
2019   PetscFunctionBegin;
2020   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2021   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2022   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
2023   if (iascii) {
2024     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
2025   } else if (isbinary) {
2026     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
2027   } else if (isdraw) {
2028     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
2029   } else {
2030     Mat B;
2031     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2032     PetscCall(MatView(B, viewer));
2033     PetscCall(MatDestroy(&B));
2034   }
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) {
2039   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2040   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2041   PetscInt    *ai = a->i, *ailen = a->ilen;
2042   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
2043   MatScalar   *ap, *aa = a->a;
2044 
2045   PetscFunctionBegin;
2046   for (k = 0; k < m; k++) { /* loop over rows */
2047     row  = im[k];
2048     brow = row / bs;
2049     if (row < 0) {
2050       v += n;
2051       continue;
2052     } /* negative row */
2053     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2054     rp   = aj ? aj + ai[brow] : NULL;       /* mustn't add to NULL, that is UB */
2055     ap   = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */
2056     nrow = ailen[brow];
2057     for (l = 0; l < n; l++) { /* loop over columns */
2058       if (in[l] < 0) {
2059         v++;
2060         continue;
2061       } /* negative column */
2062       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
2063       col  = in[l];
2064       bcol = col / bs;
2065       cidx = col % bs;
2066       ridx = row % bs;
2067       high = nrow;
2068       low  = 0; /* assume unsorted */
2069       while (high - low > 5) {
2070         t = (low + high) / 2;
2071         if (rp[t] > bcol) high = t;
2072         else low = t;
2073       }
2074       for (i = low; i < high; i++) {
2075         if (rp[i] > bcol) break;
2076         if (rp[i] == bcol) {
2077           *v++ = ap[bs2 * i + bs * cidx + ridx];
2078           goto finished;
2079         }
2080       }
2081       *v++ = 0.0;
2082     finished:;
2083     }
2084   }
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) {
2089   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2090   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2091   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2092   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2093   PetscBool          roworiented = a->roworiented;
2094   const PetscScalar *value       = v;
2095   MatScalar         *ap = NULL, *aa = a->a, *bap;
2096 
2097   PetscFunctionBegin;
2098   if (roworiented) {
2099     stepval = (n - 1) * bs;
2100   } else {
2101     stepval = (m - 1) * bs;
2102   }
2103   for (k = 0; k < m; k++) { /* loop over added rows */
2104     row = im[k];
2105     if (row < 0) continue;
2106     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
2107     rp = aj + ai[row];
2108     if (!A->structure_only) ap = aa + bs2 * ai[row];
2109     rmax = imax[row];
2110     nrow = ailen[row];
2111     low  = 0;
2112     high = nrow;
2113     for (l = 0; l < n; l++) { /* loop over added columns */
2114       if (in[l] < 0) continue;
2115       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
2116       col = in[l];
2117       if (!A->structure_only) {
2118         if (roworiented) {
2119           value = v + (k * (stepval + bs) + l) * bs;
2120         } else {
2121           value = v + (l * (stepval + bs) + k) * bs;
2122         }
2123       }
2124       if (col <= lastcol) low = 0;
2125       else high = nrow;
2126       lastcol = col;
2127       while (high - low > 7) {
2128         t = (low + high) / 2;
2129         if (rp[t] > col) high = t;
2130         else low = t;
2131       }
2132       for (i = low; i < high; i++) {
2133         if (rp[i] > col) break;
2134         if (rp[i] == col) {
2135           if (A->structure_only) goto noinsert2;
2136           bap = ap + bs2 * i;
2137           if (roworiented) {
2138             if (is == ADD_VALUES) {
2139               for (ii = 0; ii < bs; ii++, value += stepval) {
2140                 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2141               }
2142             } else {
2143               for (ii = 0; ii < bs; ii++, value += stepval) {
2144                 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2145               }
2146             }
2147           } else {
2148             if (is == ADD_VALUES) {
2149               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2150                 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
2151                 bap += bs;
2152               }
2153             } else {
2154               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2155                 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
2156                 bap += bs;
2157               }
2158             }
2159           }
2160           goto noinsert2;
2161         }
2162       }
2163       if (nonew == 1) goto noinsert2;
2164       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2165       if (A->structure_only) {
2166         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
2167       } else {
2168         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2169       }
2170       N = nrow++ - 1;
2171       high++;
2172       /* shift up all the later entries in this row */
2173       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2174       rp[i] = col;
2175       if (!A->structure_only) {
2176         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2177         bap = ap + bs2 * i;
2178         if (roworiented) {
2179           for (ii = 0; ii < bs; ii++, value += stepval) {
2180             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2181           }
2182         } else {
2183           for (ii = 0; ii < bs; ii++, value += stepval) {
2184             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2185           }
2186         }
2187       }
2188     noinsert2:;
2189       low = i;
2190     }
2191     ailen[row] = nrow;
2192   }
2193   PetscFunctionReturn(0);
2194 }
2195 
2196 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) {
2197   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2198   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2199   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2200   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2201   MatScalar   *aa    = a->a, *ap;
2202   PetscReal    ratio = 0.6;
2203 
2204   PetscFunctionBegin;
2205   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2206 
2207   if (m) rmax = ailen[0];
2208   for (i = 1; i < mbs; i++) {
2209     /* move each row back by the amount of empty slots (fshift) before it*/
2210     fshift += imax[i - 1] - ailen[i - 1];
2211     rmax = PetscMax(rmax, ailen[i]);
2212     if (fshift) {
2213       ip = aj + ai[i];
2214       ap = aa + bs2 * ai[i];
2215       N  = ailen[i];
2216       PetscCall(PetscArraymove(ip - fshift, ip, N));
2217       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2218     }
2219     ai[i] = ai[i - 1] + ailen[i - 1];
2220   }
2221   if (mbs) {
2222     fshift += imax[mbs - 1] - ailen[mbs - 1];
2223     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2224   }
2225 
2226   /* reset ilen and imax for each row */
2227   a->nonzerorowcnt = 0;
2228   if (A->structure_only) {
2229     PetscCall(PetscFree2(a->imax, a->ilen));
2230   } else { /* !A->structure_only */
2231     for (i = 0; i < mbs; i++) {
2232       ailen[i] = imax[i] = ai[i + 1] - ai[i];
2233       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2234     }
2235   }
2236   a->nz = ai[mbs];
2237 
2238   /* diagonals may have moved, so kill the diagonal pointers */
2239   a->idiagvalid = PETSC_FALSE;
2240   if (fshift && a->diag) {
2241     PetscCall(PetscFree(a->diag));
2242     PetscCall(PetscLogObjectMemory((PetscObject)A, -(mbs + 1) * sizeof(PetscInt)));
2243     a->diag = NULL;
2244   }
2245   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
2246   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
2247   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
2248   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
2249 
2250   A->info.mallocs += a->reallocs;
2251   a->reallocs         = 0;
2252   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2253   a->rmax             = rmax;
2254 
2255   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 /*
2260    This function returns an array of flags which indicate the locations of contiguous
2261    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2262    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2263    Assume: sizes should be long enough to hold all the values.
2264 */
2265 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) {
2266   PetscInt  i, j, k, row;
2267   PetscBool flg;
2268 
2269   PetscFunctionBegin;
2270   for (i = 0, j = 0; i < n; j++) {
2271     row = idx[i];
2272     if (row % bs != 0) { /* Not the beginning of a block */
2273       sizes[j] = 1;
2274       i++;
2275     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2276       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2277       i++;
2278     } else { /* Beginning of the block, so check if the complete block exists */
2279       flg = PETSC_TRUE;
2280       for (k = 1; k < bs; k++) {
2281         if (row + k != idx[i + k]) { /* break in the block */
2282           flg = PETSC_FALSE;
2283           break;
2284         }
2285       }
2286       if (flg) { /* No break in the bs */
2287         sizes[j] = bs;
2288         i += bs;
2289       } else {
2290         sizes[j] = 1;
2291         i++;
2292       }
2293     }
2294   }
2295   *bs_max = j;
2296   PetscFunctionReturn(0);
2297 }
2298 
2299 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) {
2300   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2301   PetscInt           i, j, k, count, *rows;
2302   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
2303   PetscScalar        zero = 0.0;
2304   MatScalar         *aa;
2305   const PetscScalar *xx;
2306   PetscScalar       *bb;
2307 
2308   PetscFunctionBegin;
2309   /* fix right hand side if needed */
2310   if (x && b) {
2311     PetscCall(VecGetArrayRead(x, &xx));
2312     PetscCall(VecGetArray(b, &bb));
2313     for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
2314     PetscCall(VecRestoreArrayRead(x, &xx));
2315     PetscCall(VecRestoreArray(b, &bb));
2316   }
2317 
2318   /* Make a copy of the IS and  sort it */
2319   /* allocate memory for rows,sizes */
2320   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2321 
2322   /* copy IS values to rows, and sort them */
2323   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
2324   PetscCall(PetscSortInt(is_n, rows));
2325 
2326   if (baij->keepnonzeropattern) {
2327     for (i = 0; i < is_n; i++) sizes[i] = 1;
2328     bs_max = is_n;
2329   } else {
2330     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2331     A->nonzerostate++;
2332   }
2333 
2334   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2335     row = rows[j];
2336     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2337     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2338     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
2339     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2340       if (diag != (PetscScalar)0.0) {
2341         if (baij->ilen[row / bs] > 0) {
2342           baij->ilen[row / bs]       = 1;
2343           baij->j[baij->i[row / bs]] = row / bs;
2344 
2345           PetscCall(PetscArrayzero(aa, count * bs));
2346         }
2347         /* Now insert all the diagonal values for this bs */
2348         for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES));
2349       } else { /* (diag == 0.0) */
2350         baij->ilen[row / bs] = 0;
2351       }      /* end (diag == 0.0) */
2352     } else { /* (sizes[i] != bs) */
2353       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2354       for (k = 0; k < count; k++) {
2355         aa[0] = zero;
2356         aa += bs;
2357       }
2358       if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES));
2359     }
2360   }
2361 
2362   PetscCall(PetscFree2(rows, sizes));
2363   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2364   PetscFunctionReturn(0);
2365 }
2366 
2367 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) {
2368   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2369   PetscInt           i, j, k, count;
2370   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
2371   PetscScalar        zero = 0.0;
2372   MatScalar         *aa;
2373   const PetscScalar *xx;
2374   PetscScalar       *bb;
2375   PetscBool         *zeroed, vecs = PETSC_FALSE;
2376 
2377   PetscFunctionBegin;
2378   /* fix right hand side if needed */
2379   if (x && b) {
2380     PetscCall(VecGetArrayRead(x, &xx));
2381     PetscCall(VecGetArray(b, &bb));
2382     vecs = PETSC_TRUE;
2383   }
2384 
2385   /* zero the columns */
2386   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
2387   for (i = 0; i < is_n; i++) {
2388     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
2389     zeroed[is_idx[i]] = PETSC_TRUE;
2390   }
2391   for (i = 0; i < A->rmap->N; i++) {
2392     if (!zeroed[i]) {
2393       row = i / bs;
2394       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
2395         for (k = 0; k < bs; k++) {
2396           col = bs * baij->j[j] + k;
2397           if (zeroed[col]) {
2398             aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
2399             if (vecs) bb[i] -= aa[0] * xx[col];
2400             aa[0] = 0.0;
2401           }
2402         }
2403       }
2404     } else if (vecs) bb[i] = diag * xx[i];
2405   }
2406   PetscCall(PetscFree(zeroed));
2407   if (vecs) {
2408     PetscCall(VecRestoreArrayRead(x, &xx));
2409     PetscCall(VecRestoreArray(b, &bb));
2410   }
2411 
2412   /* zero the rows */
2413   for (i = 0; i < is_n; i++) {
2414     row   = is_idx[i];
2415     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2416     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
2417     for (k = 0; k < count; k++) {
2418       aa[0] = zero;
2419       aa += bs;
2420     }
2421     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
2422   }
2423   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2424   PetscFunctionReturn(0);
2425 }
2426 
2427 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) {
2428   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2429   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2430   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2431   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2432   PetscInt     ridx, cidx, bs2                 = a->bs2;
2433   PetscBool    roworiented = a->roworiented;
2434   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;
2435 
2436   PetscFunctionBegin;
2437   for (k = 0; k < m; k++) { /* loop over added rows */
2438     row  = im[k];
2439     brow = row / bs;
2440     if (row < 0) continue;
2441     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
2442     rp = aj + ai[brow];
2443     if (!A->structure_only) ap = aa + bs2 * ai[brow];
2444     rmax = imax[brow];
2445     nrow = ailen[brow];
2446     low  = 0;
2447     high = nrow;
2448     for (l = 0; l < n; l++) { /* loop over added columns */
2449       if (in[l] < 0) continue;
2450       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
2451       col  = in[l];
2452       bcol = col / bs;
2453       ridx = row % bs;
2454       cidx = col % bs;
2455       if (!A->structure_only) {
2456         if (roworiented) {
2457           value = v[l + k * n];
2458         } else {
2459           value = v[k + l * m];
2460         }
2461       }
2462       if (col <= lastcol) low = 0;
2463       else high = nrow;
2464       lastcol = col;
2465       while (high - low > 7) {
2466         t = (low + high) / 2;
2467         if (rp[t] > bcol) high = t;
2468         else low = t;
2469       }
2470       for (i = low; i < high; i++) {
2471         if (rp[i] > bcol) break;
2472         if (rp[i] == bcol) {
2473           bap = ap + bs2 * i + bs * cidx + ridx;
2474           if (!A->structure_only) {
2475             if (is == ADD_VALUES) *bap += value;
2476             else *bap = value;
2477           }
2478           goto noinsert1;
2479         }
2480       }
2481       if (nonew == 1) goto noinsert1;
2482       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2483       if (A->structure_only) {
2484         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2485       } else {
2486         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2487       }
2488       N = nrow++ - 1;
2489       high++;
2490       /* shift up all the later entries in this row */
2491       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2492       rp[i] = bcol;
2493       if (!A->structure_only) {
2494         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2495         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2496         ap[bs2 * i + bs * cidx + ridx] = value;
2497       }
2498       a->nz++;
2499       A->nonzerostate++;
2500     noinsert1:;
2501       low = i;
2502     }
2503     ailen[brow] = nrow;
2504   }
2505   PetscFunctionReturn(0);
2506 }
2507 
2508 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) {
2509   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
2510   Mat          outA;
2511   PetscBool    row_identity, col_identity;
2512 
2513   PetscFunctionBegin;
2514   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
2515   PetscCall(ISIdentity(row, &row_identity));
2516   PetscCall(ISIdentity(col, &col_identity));
2517   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
2518 
2519   outA            = inA;
2520   inA->factortype = MAT_FACTOR_LU;
2521   PetscCall(PetscFree(inA->solvertype));
2522   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
2523 
2524   PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2525 
2526   PetscCall(PetscObjectReference((PetscObject)row));
2527   PetscCall(ISDestroy(&a->row));
2528   a->row = row;
2529   PetscCall(PetscObjectReference((PetscObject)col));
2530   PetscCall(ISDestroy(&a->col));
2531   a->col = col;
2532 
2533   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2534   PetscCall(ISDestroy(&a->icol));
2535   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2536   PetscCall(PetscLogObjectParent((PetscObject)inA, (PetscObject)a->icol));
2537 
2538   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2539   if (!a->solve_work) {
2540     PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
2541     PetscCall(PetscLogObjectMemory((PetscObject)inA, (inA->rmap->N + inA->rmap->bs) * sizeof(PetscScalar)));
2542   }
2543   PetscCall(MatLUFactorNumeric(outA, inA, info));
2544   PetscFunctionReturn(0);
2545 }
2546 
2547 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, PetscInt *indices) {
2548   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2549   PetscInt     i, nz, mbs;
2550 
2551   PetscFunctionBegin;
2552   nz  = baij->maxnz;
2553   mbs = baij->mbs;
2554   for (i = 0; i < nz; i++) baij->j[i] = indices[i];
2555   baij->nz = nz;
2556   for (i = 0; i < mbs; i++) baij->ilen[i] = baij->imax[i];
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 /*@
2561     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix.
2562 
2563   Input Parameters:
2564 +  mat - the `MATSEQBAIJ` matrix
2565 -  indices - the column indices
2566 
2567   Level: advanced
2568 
2569   Notes:
2570     This can be called if you have precomputed the nonzero structure of the
2571   matrix and want to provide it to the matrix object to improve the performance
2572   of the `MatSetValues()` operation.
2573 
2574     You MUST have set the correct numbers of nonzeros per row in the call to
2575   `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
2576 
2577     MUST be called before any calls to `MatSetValues()`
2578 
2579 .seealso: `MATSEQBAIJ`, `MatSetValues()`
2580 @*/
2581 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) {
2582   PetscFunctionBegin;
2583   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2584   PetscValidIntPointer(indices, 2);
2585   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices));
2586   PetscFunctionReturn(0);
2587 }
2588 
2589 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) {
2590   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2591   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2592   PetscReal    atmp;
2593   PetscScalar *x, zero = 0.0;
2594   MatScalar   *aa;
2595   PetscInt     ncols, brow, krow, kcol;
2596 
2597   PetscFunctionBegin;
2598   /* why is this not a macro???????????????????????????????????????????????????????????????? */
2599   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2600   bs  = A->rmap->bs;
2601   aa  = a->a;
2602   ai  = a->i;
2603   aj  = a->j;
2604   mbs = a->mbs;
2605 
2606   PetscCall(VecSet(v, zero));
2607   PetscCall(VecGetArray(v, &x));
2608   PetscCall(VecGetLocalSize(v, &n));
2609   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2610   for (i = 0; i < mbs; i++) {
2611     ncols = ai[1] - ai[0];
2612     ai++;
2613     brow = bs * i;
2614     for (j = 0; j < ncols; j++) {
2615       for (kcol = 0; kcol < bs; kcol++) {
2616         for (krow = 0; krow < bs; krow++) {
2617           atmp = PetscAbsScalar(*aa);
2618           aa++;
2619           row = brow + krow; /* row index */
2620           if (PetscAbsScalar(x[row]) < atmp) {
2621             x[row] = atmp;
2622             if (idx) idx[row] = bs * (*aj) + kcol;
2623           }
2624         }
2625       }
2626       aj++;
2627     }
2628   }
2629   PetscCall(VecRestoreArray(v, &x));
2630   PetscFunctionReturn(0);
2631 }
2632 
2633 PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) {
2634   PetscFunctionBegin;
2635   /* If the two matrices have the same copy implementation, use fast copy. */
2636   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2637     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
2638     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2639     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
2640 
2641     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
2642     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
2643     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
2644     PetscCall(PetscObjectStateIncrease((PetscObject)B));
2645   } else {
2646     PetscCall(MatCopy_Basic(A, B, str));
2647   }
2648   PetscFunctionReturn(0);
2649 }
2650 
2651 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) {
2652   PetscFunctionBegin;
2653   PetscCall(MatSeqBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL));
2654   PetscFunctionReturn(0);
2655 }
2656 
2657 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) {
2658   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2659 
2660   PetscFunctionBegin;
2661   *array = a->a;
2662   PetscFunctionReturn(0);
2663 }
2664 
2665 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) {
2666   PetscFunctionBegin;
2667   *array = NULL;
2668   PetscFunctionReturn(0);
2669 }
2670 
2671 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) {
2672   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
2673   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
2674   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
2675 
2676   PetscFunctionBegin;
2677   /* Set the number of nonzeros in the new matrix */
2678   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
2679   PetscFunctionReturn(0);
2680 }
2681 
2682 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) {
2683   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
2684   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2685   PetscBLASInt one = 1;
2686 
2687   PetscFunctionBegin;
2688   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2689     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2690     if (e) {
2691       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2692       if (e) {
2693         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2694         if (e) str = SAME_NONZERO_PATTERN;
2695       }
2696     }
2697     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2698   }
2699   if (str == SAME_NONZERO_PATTERN) {
2700     PetscScalar  alpha = a;
2701     PetscBLASInt bnz;
2702     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2703     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
2704     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2705   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2706     PetscCall(MatAXPY_Basic(Y, a, X, str));
2707   } else {
2708     Mat       B;
2709     PetscInt *nnz;
2710     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
2711     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
2712     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2713     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2714     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
2715     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
2716     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
2717     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
2718     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
2719     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2720     PetscCall(MatHeaderMerge(Y, &B));
2721     PetscCall(PetscFree(nnz));
2722   }
2723   PetscFunctionReturn(0);
2724 }
2725 
2726 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) {
2727 #if defined(PETSC_USE_COMPLEX)
2728   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2729   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2730   MatScalar   *aa = a->a;
2731 
2732   PetscFunctionBegin;
2733   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
2734 #else
2735   PetscFunctionBegin;
2736 #endif
2737   PetscFunctionReturn(0);
2738 }
2739 
2740 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) {
2741   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2742   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2743   MatScalar   *aa = a->a;
2744 
2745   PetscFunctionBegin;
2746   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
2747   PetscFunctionReturn(0);
2748 }
2749 
2750 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) {
2751   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2752   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2753   MatScalar   *aa = a->a;
2754 
2755   PetscFunctionBegin;
2756   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2757   PetscFunctionReturn(0);
2758 }
2759 
2760 /*
2761     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2762 */
2763 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) {
2764   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
2765   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
2766   PetscInt     nz = a->i[m], row, *jj, mr, col;
2767 
2768   PetscFunctionBegin;
2769   *nn = n;
2770   if (!ia) PetscFunctionReturn(0);
2771   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
2772   PetscCall(PetscCalloc1(n, &collengths));
2773   PetscCall(PetscMalloc1(n + 1, &cia));
2774   PetscCall(PetscMalloc1(nz, &cja));
2775   jj = a->j;
2776   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2777   cia[0] = oshift;
2778   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2779   PetscCall(PetscArrayzero(collengths, n));
2780   jj = a->j;
2781   for (row = 0; row < m; row++) {
2782     mr = a->i[row + 1] - a->i[row];
2783     for (i = 0; i < mr; i++) {
2784       col = *jj++;
2785 
2786       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2787     }
2788   }
2789   PetscCall(PetscFree(collengths));
2790   *ia = cia;
2791   *ja = cja;
2792   PetscFunctionReturn(0);
2793 }
2794 
2795 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) {
2796   PetscFunctionBegin;
2797   if (!ia) PetscFunctionReturn(0);
2798   PetscCall(PetscFree(*ia));
2799   PetscCall(PetscFree(*ja));
2800   PetscFunctionReturn(0);
2801 }
2802 
2803 /*
2804  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2805  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2806  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2807  */
2808 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) {
2809   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2810   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2811   PetscInt     nz = a->i[m], row, *jj, mr, col;
2812   PetscInt    *cspidx;
2813 
2814   PetscFunctionBegin;
2815   *nn = n;
2816   if (!ia) PetscFunctionReturn(0);
2817 
2818   PetscCall(PetscCalloc1(n, &collengths));
2819   PetscCall(PetscMalloc1(n + 1, &cia));
2820   PetscCall(PetscMalloc1(nz, &cja));
2821   PetscCall(PetscMalloc1(nz, &cspidx));
2822   jj = a->j;
2823   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2824   cia[0] = oshift;
2825   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2826   PetscCall(PetscArrayzero(collengths, n));
2827   jj = a->j;
2828   for (row = 0; row < m; row++) {
2829     mr = a->i[row + 1] - a->i[row];
2830     for (i = 0; i < mr; i++) {
2831       col                                         = *jj++;
2832       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2833       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2834     }
2835   }
2836   PetscCall(PetscFree(collengths));
2837   *ia    = cia;
2838   *ja    = cja;
2839   *spidx = cspidx;
2840   PetscFunctionReturn(0);
2841 }
2842 
2843 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) {
2844   PetscFunctionBegin;
2845   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
2846   PetscCall(PetscFree(*spidx));
2847   PetscFunctionReturn(0);
2848 }
2849 
2850 PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) {
2851   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
2852 
2853   PetscFunctionBegin;
2854   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
2855   PetscCall(MatShift_Basic(Y, a));
2856   PetscFunctionReturn(0);
2857 }
2858 
2859 /* -------------------------------------------------------------------*/
2860 static struct _MatOps MatOps_Values = {
2861   MatSetValues_SeqBAIJ,
2862   MatGetRow_SeqBAIJ,
2863   MatRestoreRow_SeqBAIJ,
2864   MatMult_SeqBAIJ_N,
2865   /* 4*/ MatMultAdd_SeqBAIJ_N,
2866   MatMultTranspose_SeqBAIJ,
2867   MatMultTransposeAdd_SeqBAIJ,
2868   NULL,
2869   NULL,
2870   NULL,
2871   /* 10*/ NULL,
2872   MatLUFactor_SeqBAIJ,
2873   NULL,
2874   NULL,
2875   MatTranspose_SeqBAIJ,
2876   /* 15*/ MatGetInfo_SeqBAIJ,
2877   MatEqual_SeqBAIJ,
2878   MatGetDiagonal_SeqBAIJ,
2879   MatDiagonalScale_SeqBAIJ,
2880   MatNorm_SeqBAIJ,
2881   /* 20*/ NULL,
2882   MatAssemblyEnd_SeqBAIJ,
2883   MatSetOption_SeqBAIJ,
2884   MatZeroEntries_SeqBAIJ,
2885   /* 24*/ MatZeroRows_SeqBAIJ,
2886   NULL,
2887   NULL,
2888   NULL,
2889   NULL,
2890   /* 29*/ MatSetUp_SeqBAIJ,
2891   NULL,
2892   NULL,
2893   NULL,
2894   NULL,
2895   /* 34*/ MatDuplicate_SeqBAIJ,
2896   NULL,
2897   NULL,
2898   MatILUFactor_SeqBAIJ,
2899   NULL,
2900   /* 39*/ MatAXPY_SeqBAIJ,
2901   MatCreateSubMatrices_SeqBAIJ,
2902   MatIncreaseOverlap_SeqBAIJ,
2903   MatGetValues_SeqBAIJ,
2904   MatCopy_SeqBAIJ,
2905   /* 44*/ NULL,
2906   MatScale_SeqBAIJ,
2907   MatShift_SeqBAIJ,
2908   NULL,
2909   MatZeroRowsColumns_SeqBAIJ,
2910   /* 49*/ NULL,
2911   MatGetRowIJ_SeqBAIJ,
2912   MatRestoreRowIJ_SeqBAIJ,
2913   MatGetColumnIJ_SeqBAIJ,
2914   MatRestoreColumnIJ_SeqBAIJ,
2915   /* 54*/ MatFDColoringCreate_SeqXAIJ,
2916   NULL,
2917   NULL,
2918   NULL,
2919   MatSetValuesBlocked_SeqBAIJ,
2920   /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2921   MatDestroy_SeqBAIJ,
2922   MatView_SeqBAIJ,
2923   NULL,
2924   NULL,
2925   /* 64*/ NULL,
2926   NULL,
2927   NULL,
2928   NULL,
2929   NULL,
2930   /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2931   NULL,
2932   MatConvert_Basic,
2933   NULL,
2934   NULL,
2935   /* 74*/ NULL,
2936   MatFDColoringApply_BAIJ,
2937   NULL,
2938   NULL,
2939   NULL,
2940   /* 79*/ NULL,
2941   NULL,
2942   NULL,
2943   NULL,
2944   MatLoad_SeqBAIJ,
2945   /* 84*/ NULL,
2946   NULL,
2947   NULL,
2948   NULL,
2949   NULL,
2950   /* 89*/ NULL,
2951   NULL,
2952   NULL,
2953   NULL,
2954   NULL,
2955   /* 94*/ NULL,
2956   NULL,
2957   NULL,
2958   NULL,
2959   NULL,
2960   /* 99*/ NULL,
2961   NULL,
2962   NULL,
2963   MatConjugate_SeqBAIJ,
2964   NULL,
2965   /*104*/ NULL,
2966   MatRealPart_SeqBAIJ,
2967   MatImaginaryPart_SeqBAIJ,
2968   NULL,
2969   NULL,
2970   /*109*/ NULL,
2971   NULL,
2972   NULL,
2973   NULL,
2974   MatMissingDiagonal_SeqBAIJ,
2975   /*114*/ NULL,
2976   NULL,
2977   NULL,
2978   NULL,
2979   NULL,
2980   /*119*/ NULL,
2981   NULL,
2982   MatMultHermitianTranspose_SeqBAIJ,
2983   MatMultHermitianTransposeAdd_SeqBAIJ,
2984   NULL,
2985   /*124*/ NULL,
2986   MatGetColumnReductions_SeqBAIJ,
2987   MatInvertBlockDiagonal_SeqBAIJ,
2988   NULL,
2989   NULL,
2990   /*129*/ NULL,
2991   NULL,
2992   NULL,
2993   NULL,
2994   NULL,
2995   /*134*/ NULL,
2996   NULL,
2997   NULL,
2998   NULL,
2999   NULL,
3000   /*139*/ MatSetBlockSizes_Default,
3001   NULL,
3002   NULL,
3003   MatFDColoringSetUp_SeqXAIJ,
3004   NULL,
3005   /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3006   MatDestroySubMatrices_SeqBAIJ,
3007   NULL,
3008   NULL,
3009   NULL,
3010   NULL,
3011   /*150*/ NULL,
3012 };
3013 
3014 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) {
3015   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3016   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3017 
3018   PetscFunctionBegin;
3019   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3020 
3021   /* allocate space for values if not already there */
3022   if (!aij->saved_values) {
3023     PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
3024     PetscCall(PetscLogObjectMemory((PetscObject)mat, (nz + 1) * sizeof(PetscScalar)));
3025   }
3026 
3027   /* copy values over */
3028   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
3029   PetscFunctionReturn(0);
3030 }
3031 
3032 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) {
3033   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3034   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3035 
3036   PetscFunctionBegin;
3037   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3038   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
3039 
3040   /* copy values over */
3041   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
3042   PetscFunctionReturn(0);
3043 }
3044 
3045 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3046 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3047 
3048 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, PetscInt *nnz) {
3049   Mat_SeqBAIJ *b;
3050   PetscInt     i, mbs, nbs, bs2;
3051   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3052 
3053   PetscFunctionBegin;
3054   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3055   if (nz == MAT_SKIP_ALLOCATION) {
3056     skipallocation = PETSC_TRUE;
3057     nz             = 0;
3058   }
3059 
3060   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
3061   PetscCall(PetscLayoutSetUp(B->rmap));
3062   PetscCall(PetscLayoutSetUp(B->cmap));
3063   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3064 
3065   B->preallocated = PETSC_TRUE;
3066 
3067   mbs = B->rmap->n / bs;
3068   nbs = B->cmap->n / bs;
3069   bs2 = bs * bs;
3070 
3071   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3072 
3073   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3074   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3075   if (nnz) {
3076     for (i = 0; i < mbs; i++) {
3077       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
3078       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3079     }
3080   }
3081 
3082   b = (Mat_SeqBAIJ *)B->data;
3083   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
3084   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3085   PetscOptionsEnd();
3086 
3087   if (!flg) {
3088     switch (bs) {
3089     case 1:
3090       B->ops->mult    = MatMult_SeqBAIJ_1;
3091       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3092       break;
3093     case 2:
3094       B->ops->mult    = MatMult_SeqBAIJ_2;
3095       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3096       break;
3097     case 3:
3098       B->ops->mult    = MatMult_SeqBAIJ_3;
3099       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3100       break;
3101     case 4:
3102       B->ops->mult    = MatMult_SeqBAIJ_4;
3103       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3104       break;
3105     case 5:
3106       B->ops->mult    = MatMult_SeqBAIJ_5;
3107       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3108       break;
3109     case 6:
3110       B->ops->mult    = MatMult_SeqBAIJ_6;
3111       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3112       break;
3113     case 7:
3114       B->ops->mult    = MatMult_SeqBAIJ_7;
3115       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3116       break;
3117     case 9: {
3118       PetscInt version = 1;
3119       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3120       switch (version) {
3121 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3122       case 1:
3123         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
3124         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
3125         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3126         break;
3127 #endif
3128       default:
3129         B->ops->mult    = MatMult_SeqBAIJ_N;
3130         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3131         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3132         break;
3133       }
3134       break;
3135     }
3136     case 11:
3137       B->ops->mult    = MatMult_SeqBAIJ_11;
3138       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3139       break;
3140     case 12: {
3141       PetscInt version = 1;
3142       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3143       switch (version) {
3144       case 1:
3145         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
3146         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3147         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3148         break;
3149       case 2:
3150         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
3151         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
3152         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3153         break;
3154 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3155       case 3:
3156         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
3157         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3158         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3159         break;
3160 #endif
3161       default:
3162         B->ops->mult    = MatMult_SeqBAIJ_N;
3163         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3164         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3165         break;
3166       }
3167       break;
3168     }
3169     case 15: {
3170       PetscInt version = 1;
3171       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3172       switch (version) {
3173       case 1:
3174         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3175         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3176         break;
3177       case 2:
3178         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
3179         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3180         break;
3181       case 3:
3182         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3183         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3184         break;
3185       case 4:
3186         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3187         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3188         break;
3189       default:
3190         B->ops->mult = MatMult_SeqBAIJ_N;
3191         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3192         break;
3193       }
3194       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3195       break;
3196     }
3197     default:
3198       B->ops->mult    = MatMult_SeqBAIJ_N;
3199       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3200       PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3201       break;
3202     }
3203   }
3204   B->ops->sor = MatSOR_SeqBAIJ;
3205   b->mbs      = mbs;
3206   b->nbs      = nbs;
3207   if (!skipallocation) {
3208     if (!b->imax) {
3209       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
3210       PetscCall(PetscLogObjectMemory((PetscObject)B, 2 * mbs * sizeof(PetscInt)));
3211 
3212       b->free_imax_ilen = PETSC_TRUE;
3213     }
3214     /* b->ilen will count nonzeros in each block row so far. */
3215     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3216     if (!nnz) {
3217       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3218       else if (nz < 0) nz = 1;
3219       nz = PetscMin(nz, nbs);
3220       for (i = 0; i < mbs; i++) b->imax[i] = nz;
3221       PetscCall(PetscIntMultError(nz, mbs, &nz));
3222     } else {
3223       PetscInt64 nz64 = 0;
3224       for (i = 0; i < mbs; i++) {
3225         b->imax[i] = nnz[i];
3226         nz64 += nnz[i];
3227       }
3228       PetscCall(PetscIntCast(nz64, &nz));
3229     }
3230 
3231     /* allocate the matrix space */
3232     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3233     if (B->structure_only) {
3234       PetscCall(PetscMalloc1(nz, &b->j));
3235       PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i));
3236       PetscCall(PetscLogObjectMemory((PetscObject)B, (B->rmap->N + 1) * sizeof(PetscInt) + nz * sizeof(PetscInt)));
3237     } else {
3238       PetscInt nzbs2 = 0;
3239       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
3240       PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i));
3241       PetscCall(PetscLogObjectMemory((PetscObject)B, (B->rmap->N + 1) * sizeof(PetscInt) + nz * (bs2 * sizeof(PetscScalar) + sizeof(PetscInt))));
3242       PetscCall(PetscArrayzero(b->a, nz * bs2));
3243     }
3244     PetscCall(PetscArrayzero(b->j, nz));
3245 
3246     if (B->structure_only) {
3247       b->singlemalloc = PETSC_FALSE;
3248       b->free_a       = PETSC_FALSE;
3249     } else {
3250       b->singlemalloc = PETSC_TRUE;
3251       b->free_a       = PETSC_TRUE;
3252     }
3253     b->free_ij = PETSC_TRUE;
3254 
3255     b->i[0] = 0;
3256     for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3257 
3258   } else {
3259     b->free_a  = PETSC_FALSE;
3260     b->free_ij = PETSC_FALSE;
3261   }
3262 
3263   b->bs2              = bs2;
3264   b->mbs              = mbs;
3265   b->nz               = 0;
3266   b->maxnz            = nz;
3267   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3268   B->was_assembled    = PETSC_FALSE;
3269   B->assembled        = PETSC_FALSE;
3270   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3271   PetscFunctionReturn(0);
3272 }
3273 
3274 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) {
3275   PetscInt     i, m, nz, nz_max = 0, *nnz;
3276   PetscScalar *values      = NULL;
3277   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3278 
3279   PetscFunctionBegin;
3280   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
3281   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
3282   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
3283   PetscCall(PetscLayoutSetUp(B->rmap));
3284   PetscCall(PetscLayoutSetUp(B->cmap));
3285   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3286   m = B->rmap->n / bs;
3287 
3288   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
3289   PetscCall(PetscMalloc1(m + 1, &nnz));
3290   for (i = 0; i < m; i++) {
3291     nz = ii[i + 1] - ii[i];
3292     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3293     nz_max = PetscMax(nz_max, nz);
3294     nnz[i] = nz;
3295   }
3296   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
3297   PetscCall(PetscFree(nnz));
3298 
3299   values = (PetscScalar *)V;
3300   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3301   for (i = 0; i < m; i++) {
3302     PetscInt        ncols = ii[i + 1] - ii[i];
3303     const PetscInt *icols = jj + ii[i];
3304     if (bs == 1 || !roworiented) {
3305       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
3306       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
3307     } else {
3308       PetscInt j;
3309       for (j = 0; j < ncols; j++) {
3310         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
3311         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
3312       }
3313     }
3314   }
3315   if (!V) PetscCall(PetscFree(values));
3316   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3317   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3318   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3319   PetscFunctionReturn(0);
3320 }
3321 
3322 /*@C
3323    MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3324 
3325    Not Collective
3326 
3327    Input Parameter:
3328 .  mat - a `MATSEQBAIJ` matrix
3329 
3330    Output Parameter:
3331 .   array - pointer to the data
3332 
3333    Level: intermediate
3334 
3335 .seealso: `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3336 @*/
3337 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array) {
3338   PetscFunctionBegin;
3339   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
3340   PetscFunctionReturn(0);
3341 }
3342 
3343 /*@C
3344    MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3345 
3346    Not Collective
3347 
3348    Input Parameters:
3349 +  mat - a `MATSEQBAIJ` matrix
3350 -  array - pointer to the data
3351 
3352    Level: intermediate
3353 
3354 .seealso: `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3355 @*/
3356 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array) {
3357   PetscFunctionBegin;
3358   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
3359   PetscFunctionReturn(0);
3360 }
3361 
3362 /*MC
3363    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3364    block sparse compressed row format.
3365 
3366    Options Database Keys:
3367 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3368 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3369 
3370    Level: beginner
3371 
3372    Notes:
3373     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
3374     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
3375 
3376    Run with -info to see what version of the matrix-vector product is being used
3377 
3378 .seealso: `MatCreateSeqBAIJ()`
3379 M*/
3380 
3381 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3382 
3383 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) {
3384   PetscMPIInt  size;
3385   Mat_SeqBAIJ *b;
3386 
3387   PetscFunctionBegin;
3388   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
3389   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3390 
3391   PetscCall(PetscNewLog(B, &b));
3392   B->data = (void *)b;
3393   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
3394 
3395   b->row          = NULL;
3396   b->col          = NULL;
3397   b->icol         = NULL;
3398   b->reallocs     = 0;
3399   b->saved_values = NULL;
3400 
3401   b->roworiented        = PETSC_TRUE;
3402   b->nonew              = 0;
3403   b->diag               = NULL;
3404   B->spptr              = NULL;
3405   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3406   b->keepnonzeropattern = PETSC_FALSE;
3407 
3408   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
3409   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
3410   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
3411   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
3412   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
3413   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
3414   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
3415   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
3416   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
3417   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
3418 #if defined(PETSC_HAVE_HYPRE)
3419   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
3420 #endif
3421   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
3422   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
3423   PetscFunctionReturn(0);
3424 }
3425 
3426 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) {
3427   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3428   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3429 
3430   PetscFunctionBegin;
3431   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
3432 
3433   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3434     c->imax           = a->imax;
3435     c->ilen           = a->ilen;
3436     c->free_imax_ilen = PETSC_FALSE;
3437   } else {
3438     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3439     PetscCall(PetscLogObjectMemory((PetscObject)C, 2 * mbs * sizeof(PetscInt)));
3440     for (i = 0; i < mbs; i++) {
3441       c->imax[i] = a->imax[i];
3442       c->ilen[i] = a->ilen[i];
3443     }
3444     c->free_imax_ilen = PETSC_TRUE;
3445   }
3446 
3447   /* allocate the matrix space */
3448   if (mallocmatspace) {
3449     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3450       PetscCall(PetscCalloc1(bs2 * nz, &c->a));
3451       PetscCall(PetscLogObjectMemory((PetscObject)C, a->i[mbs] * bs2 * sizeof(PetscScalar)));
3452 
3453       c->i            = a->i;
3454       c->j            = a->j;
3455       c->singlemalloc = PETSC_FALSE;
3456       c->free_a       = PETSC_TRUE;
3457       c->free_ij      = PETSC_FALSE;
3458       c->parent       = A;
3459       C->preallocated = PETSC_TRUE;
3460       C->assembled    = PETSC_TRUE;
3461 
3462       PetscCall(PetscObjectReference((PetscObject)A));
3463       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3464       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3465     } else {
3466       PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i));
3467       PetscCall(PetscLogObjectMemory((PetscObject)C, a->i[mbs] * (bs2 * sizeof(PetscScalar) + sizeof(PetscInt)) + (mbs + 1) * sizeof(PetscInt)));
3468 
3469       c->singlemalloc = PETSC_TRUE;
3470       c->free_a       = PETSC_TRUE;
3471       c->free_ij      = PETSC_TRUE;
3472 
3473       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3474       if (mbs > 0) {
3475         PetscCall(PetscArraycpy(c->j, a->j, nz));
3476         if (cpvalues == MAT_COPY_VALUES) {
3477           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
3478         } else {
3479           PetscCall(PetscArrayzero(c->a, bs2 * nz));
3480         }
3481       }
3482       C->preallocated = PETSC_TRUE;
3483       C->assembled    = PETSC_TRUE;
3484     }
3485   }
3486 
3487   c->roworiented = a->roworiented;
3488   c->nonew       = a->nonew;
3489 
3490   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
3491   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
3492 
3493   c->bs2 = a->bs2;
3494   c->mbs = a->mbs;
3495   c->nbs = a->nbs;
3496 
3497   if (a->diag) {
3498     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3499       c->diag      = a->diag;
3500       c->free_diag = PETSC_FALSE;
3501     } else {
3502       PetscCall(PetscMalloc1(mbs + 1, &c->diag));
3503       PetscCall(PetscLogObjectMemory((PetscObject)C, (mbs + 1) * sizeof(PetscInt)));
3504       for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i];
3505       c->free_diag = PETSC_TRUE;
3506     }
3507   } else c->diag = NULL;
3508 
3509   c->nz         = a->nz;
3510   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3511   c->solve_work = NULL;
3512   c->mult_work  = NULL;
3513   c->sor_workt  = NULL;
3514   c->sor_work   = NULL;
3515 
3516   c->compressedrow.use   = a->compressedrow.use;
3517   c->compressedrow.nrows = a->compressedrow.nrows;
3518   if (a->compressedrow.use) {
3519     i = a->compressedrow.nrows;
3520     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
3521     PetscCall(PetscLogObjectMemory((PetscObject)C, (2 * i + 1) * sizeof(PetscInt)));
3522     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
3523     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
3524   } else {
3525     c->compressedrow.use    = PETSC_FALSE;
3526     c->compressedrow.i      = NULL;
3527     c->compressedrow.rindex = NULL;
3528   }
3529   C->nonzerostate = A->nonzerostate;
3530 
3531   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
3532   PetscFunctionReturn(0);
3533 }
3534 
3535 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) {
3536   PetscFunctionBegin;
3537   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
3538   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
3539   PetscCall(MatSetType(*B, MATSEQBAIJ));
3540   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
3541   PetscFunctionReturn(0);
3542 }
3543 
3544 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3545 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) {
3546   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3547   PetscInt    *rowidxs, *colidxs;
3548   PetscScalar *matvals;
3549 
3550   PetscFunctionBegin;
3551   PetscCall(PetscViewerSetUp(viewer));
3552 
3553   /* read matrix header */
3554   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3555   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3556   M  = header[1];
3557   N  = header[2];
3558   nz = header[3];
3559   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3560   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3561   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3562 
3563   /* set block sizes from the viewer's .info file */
3564   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3565   /* set local and global sizes if not set already */
3566   if (mat->rmap->n < 0) mat->rmap->n = M;
3567   if (mat->cmap->n < 0) mat->cmap->n = N;
3568   if (mat->rmap->N < 0) mat->rmap->N = M;
3569   if (mat->cmap->N < 0) mat->cmap->N = N;
3570   PetscCall(PetscLayoutSetUp(mat->rmap));
3571   PetscCall(PetscLayoutSetUp(mat->cmap));
3572 
3573   /* check if the matrix sizes are correct */
3574   PetscCall(MatGetSize(mat, &rows, &cols));
3575   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3576   PetscCall(MatGetBlockSize(mat, &bs));
3577   PetscCall(MatGetLocalSize(mat, &m, &n));
3578   mbs = m / bs;
3579   nbs = n / bs;
3580 
3581   /* read in row lengths, column indices and nonzero values */
3582   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3583   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
3584   rowidxs[0] = 0;
3585   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3586   sum = rowidxs[m];
3587   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3588 
3589   /* read in column indices and nonzero values */
3590   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
3591   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
3592   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3593 
3594   {               /* preallocate matrix storage */
3595     PetscBT   bt; /* helper bit set to count nonzeros */
3596     PetscInt *nnz;
3597     PetscBool sbaij;
3598 
3599     PetscCall(PetscBTCreate(nbs, &bt));
3600     PetscCall(PetscCalloc1(mbs, &nnz));
3601     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3602     for (i = 0; i < mbs; i++) {
3603       PetscCall(PetscBTMemzero(nbs, bt));
3604       for (k = 0; k < bs; k++) {
3605         PetscInt row = bs * i + k;
3606         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3607           PetscInt col = colidxs[j];
3608           if (!sbaij || col >= row)
3609             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3610         }
3611       }
3612     }
3613     PetscCall(PetscBTDestroy(&bt));
3614     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
3615     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
3616     PetscCall(PetscFree(nnz));
3617   }
3618 
3619   /* store matrix values */
3620   for (i = 0; i < m; i++) {
3621     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
3622     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3623   }
3624 
3625   PetscCall(PetscFree(rowidxs));
3626   PetscCall(PetscFree2(colidxs, matvals));
3627   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3628   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3629   PetscFunctionReturn(0);
3630 }
3631 
3632 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) {
3633   PetscBool isbinary;
3634 
3635   PetscFunctionBegin;
3636   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3637   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3638   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
3639   PetscFunctionReturn(0);
3640 }
3641 
3642 /*@C
3643    MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3644    compressed row) format.  For good matrix assembly performance the
3645    user should preallocate the matrix storage by setting the parameter nz
3646    (or the array nnz).  By setting these parameters accurately, performance
3647    during matrix assembly can be increased by more than a factor of 50.
3648 
3649    Collective
3650 
3651    Input Parameters:
3652 +  comm - MPI communicator, set to `PETSC_COMM_SELF`
3653 .  bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3654           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3655 .  m - number of rows
3656 .  n - number of columns
3657 .  nz - number of nonzero blocks  per block row (same for all rows)
3658 -  nnz - array containing the number of nonzero blocks in the various block rows
3659          (possibly different for each block row) or NULL
3660 
3661    Output Parameter:
3662 .  A - the matrix
3663 
3664    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3665    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3666    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
3667 
3668    Options Database Keys:
3669 +   -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3670 -   -mat_block_size - size of the blocks to use
3671 
3672    Level: intermediate
3673 
3674    Notes:
3675    The number of rows and columns must be divisible by blocksize.
3676 
3677    If the nnz parameter is given then the nz parameter is ignored
3678 
3679    A nonzero block is any block that as 1 or more nonzeros in it
3680 
3681    The `MATSEQBAIJ` format is fully compatible with standard Fortran 77
3682    storage.  That is, the stored row and column indices can begin at
3683    either one (as in Fortran) or zero.  See the users' manual for details.
3684 
3685    Specify the preallocated storage with either nz or nnz (not both).
3686    Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
3687    allocation.  See Users-Manual: ch_mat for details.
3688    matrices.
3689 
3690 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3691 @*/
3692 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) {
3693   PetscFunctionBegin;
3694   PetscCall(MatCreate(comm, A));
3695   PetscCall(MatSetSizes(*A, m, n, m, n));
3696   PetscCall(MatSetType(*A, MATSEQBAIJ));
3697   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
3698   PetscFunctionReturn(0);
3699 }
3700 
3701 /*@C
3702    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3703    per row in the matrix. For good matrix assembly performance the
3704    user should preallocate the matrix storage by setting the parameter nz
3705    (or the array nnz).  By setting these parameters accurately, performance
3706    during matrix assembly can be increased by more than a factor of 50.
3707 
3708    Collective
3709 
3710    Input Parameters:
3711 +  B - the matrix
3712 .  bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3713           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3714 .  nz - number of block nonzeros per block row (same for all rows)
3715 -  nnz - array containing the number of block nonzeros in the various block rows
3716          (possibly different for each block row) or NULL
3717 
3718    Options Database Keys:
3719 +   -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3720 -   -mat_block_size - size of the blocks to use
3721 
3722    Level: intermediate
3723 
3724    Notes:
3725    If the nnz parameter is given then the nz parameter is ignored
3726 
3727    You can call `MatGetInfo()` to get information on how effective the preallocation was;
3728    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3729    You can also run with the option -info and look for messages with the string
3730    malloc in them to see if additional memory allocation was needed.
3731 
3732    The `MATSEQBAIJ` format is fully compatible with standard Fortran 77
3733    storage.  That is, the stored row and column indices can begin at
3734    either one (as in Fortran) or zero.  See the users' manual for details.
3735 
3736    Specify the preallocated storage with either nz or nnz (not both).
3737    Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
3738    allocation.  See Users-Manual: ch_mat for details.
3739 
3740 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3741 @*/
3742 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) {
3743   PetscFunctionBegin;
3744   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3745   PetscValidType(B, 1);
3746   PetscValidLogicalCollectiveInt(B, bs, 2);
3747   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
3748   PetscFunctionReturn(0);
3749 }
3750 
3751 /*@C
3752    MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3753 
3754    Collective
3755 
3756    Input Parameters:
3757 +  B - the matrix
3758 .  i - the indices into j for the start of each local row (starts with zero)
3759 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3760 -  v - optional values in the matrix
3761 
3762    Level: advanced
3763 
3764    Notes:
3765    The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
3766    may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
3767    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3768    `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3769    block column and the second index is over columns within a block.
3770 
3771    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3772 
3773 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3774 @*/
3775 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) {
3776   PetscFunctionBegin;
3777   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3778   PetscValidType(B, 1);
3779   PetscValidLogicalCollectiveInt(B, bs, 2);
3780   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
3781   PetscFunctionReturn(0);
3782 }
3783 
3784 /*@
3785      MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3786 
3787      Collective
3788 
3789    Input Parameters:
3790 +  comm - must be an MPI communicator of size 1
3791 .  bs - size of block
3792 .  m - number of rows
3793 .  n - number of columns
3794 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3795 .  j - column indices
3796 -  a - matrix values
3797 
3798    Output Parameter:
3799 .  mat - the matrix
3800 
3801    Level: advanced
3802 
3803    Notes:
3804        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3805     once the matrix is destroyed
3806 
3807        You cannot set new nonzero locations into this matrix, that will generate an error.
3808 
3809        The i and j indices are 0 based
3810 
3811        When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3812 
3813       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3814       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3815       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3816       with column-major ordering within blocks.
3817 
3818 .seealso: `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3819 @*/
3820 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) {
3821   PetscInt     ii;
3822   Mat_SeqBAIJ *baij;
3823 
3824   PetscFunctionBegin;
3825   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
3826   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3827 
3828   PetscCall(MatCreate(comm, mat));
3829   PetscCall(MatSetSizes(*mat, m, n, m, n));
3830   PetscCall(MatSetType(*mat, MATSEQBAIJ));
3831   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3832   baij = (Mat_SeqBAIJ *)(*mat)->data;
3833   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3834   PetscCall(PetscLogObjectMemory((PetscObject)*mat, 2 * m * sizeof(PetscInt)));
3835 
3836   baij->i = i;
3837   baij->j = j;
3838   baij->a = a;
3839 
3840   baij->singlemalloc = PETSC_FALSE;
3841   baij->nonew        = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3842   baij->free_a       = PETSC_FALSE;
3843   baij->free_ij      = PETSC_FALSE;
3844 
3845   for (ii = 0; ii < m; ii++) {
3846     baij->ilen[ii] = baij->imax[ii] = i[ii + 1] - i[ii];
3847     PetscCheck(i[ii + 1] - i[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, i[ii + 1] - i[ii]);
3848   }
3849   if (PetscDefined(USE_DEBUG)) {
3850     for (ii = 0; ii < baij->i[m]; ii++) {
3851       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3852       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3853     }
3854   }
3855 
3856   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3857   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3858   PetscFunctionReturn(0);
3859 }
3860 
3861 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) {
3862   PetscFunctionBegin;
3863   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
3864   PetscFunctionReturn(0);
3865 }
3866