xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 1fcf445a439848b2e538efa39b3f63deb630163e)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h> /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */
12 #define TYPE BAIJ
13 #define TYPE_BS
14 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
15 #undef TYPE_BS
16 #define TYPE_BS _BS
17 #define TYPE_BS_ON
18 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
19 #undef TYPE_BS
20 #include "../src/mat/impls/aij/seq/seqhashmat.h"
21 #undef TYPE
22 #undef TYPE_BS_ON
23 
24 #if defined(PETSC_HAVE_HYPRE)
25 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
26 #endif
27 
28 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
29 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
30 #endif
31 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
32 
33 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
34 {
35   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
36   PetscInt     m, n, ib, jb, bs = A->rmap->bs;
37   MatScalar   *a_val = a_aij->a;
38 
39   PetscFunctionBegin;
40   PetscCall(MatGetSize(A, &m, &n));
41   PetscCall(PetscArrayzero(reductions, n));
42   if (type == NORM_2) {
43     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
44       for (jb = 0; jb < bs; jb++) {
45         for (ib = 0; ib < bs; ib++) {
46           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
47           a_val++;
48         }
49       }
50     }
51   } else if (type == NORM_1) {
52     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
53       for (jb = 0; jb < bs; jb++) {
54         for (ib = 0; ib < bs; ib++) {
55           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
56           a_val++;
57         }
58       }
59     }
60   } else if (type == NORM_INFINITY) {
61     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
62       for (jb = 0; jb < bs; jb++) {
63         for (ib = 0; ib < bs; ib++) {
64           int col         = A->cmap->rstart + a_aij->j[i] * bs + jb;
65           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
66           a_val++;
67         }
68       }
69     }
70   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
71     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
72       for (jb = 0; jb < bs; jb++) {
73         for (ib = 0; ib < bs; ib++) {
74           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
75           a_val++;
76         }
77       }
78     }
79   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
80     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
81       for (jb = 0; jb < bs; jb++) {
82         for (ib = 0; ib < bs; ib++) {
83           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
84           a_val++;
85         }
86       }
87     }
88   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
89   if (type == NORM_2) {
90     for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
91   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
92     for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
93   }
94   PetscFunctionReturn(PETSC_SUCCESS);
95 }
96 
97 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
98 {
99   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
100   PetscInt    *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
101   MatScalar   *v     = a->a, *odiag, *diag, work[25], *v_work;
102   PetscReal    shift = 0.0;
103   PetscBool    allowzeropivot, zeropivotdetected = PETSC_FALSE;
104 
105   PetscFunctionBegin;
106   allowzeropivot = PetscNot(A->erroriffailure);
107 
108   if (a->idiagvalid) {
109     if (values) *values = a->idiag;
110     PetscFunctionReturn(PETSC_SUCCESS);
111   }
112   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
113   diag_offset = a->diag;
114   if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); }
115   diag = a->idiag;
116   if (values) *values = a->idiag;
117   /* factor and invert each block */
118   switch (bs) {
119   case 1:
120     for (i = 0; i < mbs; i++) {
121       odiag   = v + 1 * diag_offset[i];
122       diag[0] = odiag[0];
123 
124       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
125         if (allowzeropivot) {
126           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
127           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
128           A->factorerror_zeropivot_row   = i;
129           PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
130         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
131       }
132 
133       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
134       diag += 1;
135     }
136     break;
137   case 2:
138     for (i = 0; i < mbs; i++) {
139       odiag   = v + 4 * diag_offset[i];
140       diag[0] = odiag[0];
141       diag[1] = odiag[1];
142       diag[2] = odiag[2];
143       diag[3] = odiag[3];
144       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
145       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
146       diag += 4;
147     }
148     break;
149   case 3:
150     for (i = 0; i < mbs; i++) {
151       odiag   = v + 9 * diag_offset[i];
152       diag[0] = odiag[0];
153       diag[1] = odiag[1];
154       diag[2] = odiag[2];
155       diag[3] = odiag[3];
156       diag[4] = odiag[4];
157       diag[5] = odiag[5];
158       diag[6] = odiag[6];
159       diag[7] = odiag[7];
160       diag[8] = odiag[8];
161       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
162       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
163       diag += 9;
164     }
165     break;
166   case 4:
167     for (i = 0; i < mbs; i++) {
168       odiag = v + 16 * diag_offset[i];
169       PetscCall(PetscArraycpy(diag, odiag, 16));
170       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
171       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
172       diag += 16;
173     }
174     break;
175   case 5:
176     for (i = 0; i < mbs; i++) {
177       odiag = v + 25 * diag_offset[i];
178       PetscCall(PetscArraycpy(diag, odiag, 25));
179       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
180       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
181       diag += 25;
182     }
183     break;
184   case 6:
185     for (i = 0; i < mbs; i++) {
186       odiag = v + 36 * diag_offset[i];
187       PetscCall(PetscArraycpy(diag, odiag, 36));
188       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
189       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
190       diag += 36;
191     }
192     break;
193   case 7:
194     for (i = 0; i < mbs; i++) {
195       odiag = v + 49 * diag_offset[i];
196       PetscCall(PetscArraycpy(diag, odiag, 49));
197       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
198       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
199       diag += 49;
200     }
201     break;
202   default:
203     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
204     for (i = 0; i < mbs; i++) {
205       odiag = v + bs2 * diag_offset[i];
206       PetscCall(PetscArraycpy(diag, odiag, bs2));
207       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
208       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
209       diag += bs2;
210     }
211     PetscCall(PetscFree2(v_work, v_pivots));
212   }
213   a->idiagvalid = PETSC_TRUE;
214   PetscFunctionReturn(PETSC_SUCCESS);
215 }
216 
217 PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
218 {
219   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
220   PetscScalar       *x, *work, *w, *workt, *t;
221   const MatScalar   *v, *aa = a->a, *idiag;
222   const PetscScalar *b, *xb;
223   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
224   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
225   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;
226 
227   PetscFunctionBegin;
228   its = its * lits;
229   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
230   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
231   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
232   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
233   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
234 
235   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
236 
237   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
238   diag  = a->diag;
239   idiag = a->idiag;
240   k     = PetscMax(A->rmap->n, A->cmap->n);
241   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
242   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
243   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
244   work = a->mult_work;
245   t    = a->sor_workt;
246   w    = a->sor_work;
247 
248   PetscCall(VecGetArray(xx, &x));
249   PetscCall(VecGetArrayRead(bb, &b));
250 
251   if (flag & SOR_ZERO_INITIAL_GUESS) {
252     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
253       switch (bs) {
254       case 1:
255         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
256         t[0] = b[0];
257         i2   = 1;
258         idiag += 1;
259         for (i = 1; i < m; i++) {
260           v    = aa + ai[i];
261           vi   = aj + ai[i];
262           nz   = diag[i] - ai[i];
263           s[0] = b[i2];
264           for (j = 0; j < nz; j++) {
265             xw[0] = x[vi[j]];
266             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
267           }
268           t[i2] = s[0];
269           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
270           x[i2] = xw[0];
271           idiag += 1;
272           i2 += 1;
273         }
274         break;
275       case 2:
276         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
277         t[0] = b[0];
278         t[1] = b[1];
279         i2   = 2;
280         idiag += 4;
281         for (i = 1; i < m; i++) {
282           v    = aa + 4 * ai[i];
283           vi   = aj + ai[i];
284           nz   = diag[i] - ai[i];
285           s[0] = b[i2];
286           s[1] = b[i2 + 1];
287           for (j = 0; j < nz; j++) {
288             idx   = 2 * vi[j];
289             it    = 4 * j;
290             xw[0] = x[idx];
291             xw[1] = x[1 + idx];
292             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
293           }
294           t[i2]     = s[0];
295           t[i2 + 1] = s[1];
296           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
297           x[i2]     = xw[0];
298           x[i2 + 1] = xw[1];
299           idiag += 4;
300           i2 += 2;
301         }
302         break;
303       case 3:
304         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
305         t[0] = b[0];
306         t[1] = b[1];
307         t[2] = b[2];
308         i2   = 3;
309         idiag += 9;
310         for (i = 1; i < m; i++) {
311           v    = aa + 9 * ai[i];
312           vi   = aj + ai[i];
313           nz   = diag[i] - ai[i];
314           s[0] = b[i2];
315           s[1] = b[i2 + 1];
316           s[2] = b[i2 + 2];
317           while (nz--) {
318             idx   = 3 * (*vi++);
319             xw[0] = x[idx];
320             xw[1] = x[1 + idx];
321             xw[2] = x[2 + idx];
322             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
323             v += 9;
324           }
325           t[i2]     = s[0];
326           t[i2 + 1] = s[1];
327           t[i2 + 2] = s[2];
328           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
329           x[i2]     = xw[0];
330           x[i2 + 1] = xw[1];
331           x[i2 + 2] = xw[2];
332           idiag += 9;
333           i2 += 3;
334         }
335         break;
336       case 4:
337         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
338         t[0] = b[0];
339         t[1] = b[1];
340         t[2] = b[2];
341         t[3] = b[3];
342         i2   = 4;
343         idiag += 16;
344         for (i = 1; i < m; i++) {
345           v    = aa + 16 * ai[i];
346           vi   = aj + ai[i];
347           nz   = diag[i] - ai[i];
348           s[0] = b[i2];
349           s[1] = b[i2 + 1];
350           s[2] = b[i2 + 2];
351           s[3] = b[i2 + 3];
352           while (nz--) {
353             idx   = 4 * (*vi++);
354             xw[0] = x[idx];
355             xw[1] = x[1 + idx];
356             xw[2] = x[2 + idx];
357             xw[3] = x[3 + idx];
358             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
359             v += 16;
360           }
361           t[i2]     = s[0];
362           t[i2 + 1] = s[1];
363           t[i2 + 2] = s[2];
364           t[i2 + 3] = s[3];
365           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
366           x[i2]     = xw[0];
367           x[i2 + 1] = xw[1];
368           x[i2 + 2] = xw[2];
369           x[i2 + 3] = xw[3];
370           idiag += 16;
371           i2 += 4;
372         }
373         break;
374       case 5:
375         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
376         t[0] = b[0];
377         t[1] = b[1];
378         t[2] = b[2];
379         t[3] = b[3];
380         t[4] = b[4];
381         i2   = 5;
382         idiag += 25;
383         for (i = 1; i < m; i++) {
384           v    = aa + 25 * ai[i];
385           vi   = aj + ai[i];
386           nz   = diag[i] - ai[i];
387           s[0] = b[i2];
388           s[1] = b[i2 + 1];
389           s[2] = b[i2 + 2];
390           s[3] = b[i2 + 3];
391           s[4] = b[i2 + 4];
392           while (nz--) {
393             idx   = 5 * (*vi++);
394             xw[0] = x[idx];
395             xw[1] = x[1 + idx];
396             xw[2] = x[2 + idx];
397             xw[3] = x[3 + idx];
398             xw[4] = x[4 + idx];
399             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
400             v += 25;
401           }
402           t[i2]     = s[0];
403           t[i2 + 1] = s[1];
404           t[i2 + 2] = s[2];
405           t[i2 + 3] = s[3];
406           t[i2 + 4] = s[4];
407           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
408           x[i2]     = xw[0];
409           x[i2 + 1] = xw[1];
410           x[i2 + 2] = xw[2];
411           x[i2 + 3] = xw[3];
412           x[i2 + 4] = xw[4];
413           idiag += 25;
414           i2 += 5;
415         }
416         break;
417       case 6:
418         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
419         t[0] = b[0];
420         t[1] = b[1];
421         t[2] = b[2];
422         t[3] = b[3];
423         t[4] = b[4];
424         t[5] = b[5];
425         i2   = 6;
426         idiag += 36;
427         for (i = 1; i < m; i++) {
428           v    = aa + 36 * ai[i];
429           vi   = aj + ai[i];
430           nz   = diag[i] - ai[i];
431           s[0] = b[i2];
432           s[1] = b[i2 + 1];
433           s[2] = b[i2 + 2];
434           s[3] = b[i2 + 3];
435           s[4] = b[i2 + 4];
436           s[5] = b[i2 + 5];
437           while (nz--) {
438             idx   = 6 * (*vi++);
439             xw[0] = x[idx];
440             xw[1] = x[1 + idx];
441             xw[2] = x[2 + idx];
442             xw[3] = x[3 + idx];
443             xw[4] = x[4 + idx];
444             xw[5] = x[5 + idx];
445             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
446             v += 36;
447           }
448           t[i2]     = s[0];
449           t[i2 + 1] = s[1];
450           t[i2 + 2] = s[2];
451           t[i2 + 3] = s[3];
452           t[i2 + 4] = s[4];
453           t[i2 + 5] = s[5];
454           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
455           x[i2]     = xw[0];
456           x[i2 + 1] = xw[1];
457           x[i2 + 2] = xw[2];
458           x[i2 + 3] = xw[3];
459           x[i2 + 4] = xw[4];
460           x[i2 + 5] = xw[5];
461           idiag += 36;
462           i2 += 6;
463         }
464         break;
465       case 7:
466         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
467         t[0] = b[0];
468         t[1] = b[1];
469         t[2] = b[2];
470         t[3] = b[3];
471         t[4] = b[4];
472         t[5] = b[5];
473         t[6] = b[6];
474         i2   = 7;
475         idiag += 49;
476         for (i = 1; i < m; i++) {
477           v    = aa + 49 * ai[i];
478           vi   = aj + ai[i];
479           nz   = diag[i] - ai[i];
480           s[0] = b[i2];
481           s[1] = b[i2 + 1];
482           s[2] = b[i2 + 2];
483           s[3] = b[i2 + 3];
484           s[4] = b[i2 + 4];
485           s[5] = b[i2 + 5];
486           s[6] = b[i2 + 6];
487           while (nz--) {
488             idx   = 7 * (*vi++);
489             xw[0] = x[idx];
490             xw[1] = x[1 + idx];
491             xw[2] = x[2 + idx];
492             xw[3] = x[3 + idx];
493             xw[4] = x[4 + idx];
494             xw[5] = x[5 + idx];
495             xw[6] = x[6 + idx];
496             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
497             v += 49;
498           }
499           t[i2]     = s[0];
500           t[i2 + 1] = s[1];
501           t[i2 + 2] = s[2];
502           t[i2 + 3] = s[3];
503           t[i2 + 4] = s[4];
504           t[i2 + 5] = s[5];
505           t[i2 + 6] = s[6];
506           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
507           x[i2]     = xw[0];
508           x[i2 + 1] = xw[1];
509           x[i2 + 2] = xw[2];
510           x[i2 + 3] = xw[3];
511           x[i2 + 4] = xw[4];
512           x[i2 + 5] = xw[5];
513           x[i2 + 6] = xw[6];
514           idiag += 49;
515           i2 += 7;
516         }
517         break;
518       default:
519         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
520         PetscCall(PetscArraycpy(t, b, bs));
521         i2 = bs;
522         idiag += bs2;
523         for (i = 1; i < m; i++) {
524           v  = aa + bs2 * ai[i];
525           vi = aj + ai[i];
526           nz = diag[i] - ai[i];
527 
528           PetscCall(PetscArraycpy(w, b + i2, bs));
529           /* copy all rows of x that are needed into contiguous space */
530           workt = work;
531           for (j = 0; j < nz; j++) {
532             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
533             workt += bs;
534           }
535           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
536           PetscCall(PetscArraycpy(t + i2, w, bs));
537           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
538 
539           idiag += bs2;
540           i2 += bs;
541         }
542         break;
543       }
544       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
545       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
546       xb = t;
547     } else xb = b;
548     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
549       idiag = a->idiag + bs2 * (a->mbs - 1);
550       i2    = bs * (m - 1);
551       switch (bs) {
552       case 1:
553         s[0] = xb[i2];
554         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
555         x[i2] = xw[0];
556         i2 -= 1;
557         for (i = m - 2; i >= 0; i--) {
558           v    = aa + (diag[i] + 1);
559           vi   = aj + diag[i] + 1;
560           nz   = ai[i + 1] - diag[i] - 1;
561           s[0] = xb[i2];
562           for (j = 0; j < nz; j++) {
563             xw[0] = x[vi[j]];
564             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
565           }
566           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
567           x[i2] = xw[0];
568           idiag -= 1;
569           i2 -= 1;
570         }
571         break;
572       case 2:
573         s[0] = xb[i2];
574         s[1] = xb[i2 + 1];
575         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
576         x[i2]     = xw[0];
577         x[i2 + 1] = xw[1];
578         i2 -= 2;
579         idiag -= 4;
580         for (i = m - 2; i >= 0; i--) {
581           v    = aa + 4 * (diag[i] + 1);
582           vi   = aj + diag[i] + 1;
583           nz   = ai[i + 1] - diag[i] - 1;
584           s[0] = xb[i2];
585           s[1] = xb[i2 + 1];
586           for (j = 0; j < nz; j++) {
587             idx   = 2 * vi[j];
588             it    = 4 * j;
589             xw[0] = x[idx];
590             xw[1] = x[1 + idx];
591             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
592           }
593           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
594           x[i2]     = xw[0];
595           x[i2 + 1] = xw[1];
596           idiag -= 4;
597           i2 -= 2;
598         }
599         break;
600       case 3:
601         s[0] = xb[i2];
602         s[1] = xb[i2 + 1];
603         s[2] = xb[i2 + 2];
604         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
605         x[i2]     = xw[0];
606         x[i2 + 1] = xw[1];
607         x[i2 + 2] = xw[2];
608         i2 -= 3;
609         idiag -= 9;
610         for (i = m - 2; i >= 0; i--) {
611           v    = aa + 9 * (diag[i] + 1);
612           vi   = aj + diag[i] + 1;
613           nz   = ai[i + 1] - diag[i] - 1;
614           s[0] = xb[i2];
615           s[1] = xb[i2 + 1];
616           s[2] = xb[i2 + 2];
617           while (nz--) {
618             idx   = 3 * (*vi++);
619             xw[0] = x[idx];
620             xw[1] = x[1 + idx];
621             xw[2] = x[2 + idx];
622             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
623             v += 9;
624           }
625           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
626           x[i2]     = xw[0];
627           x[i2 + 1] = xw[1];
628           x[i2 + 2] = xw[2];
629           idiag -= 9;
630           i2 -= 3;
631         }
632         break;
633       case 4:
634         s[0] = xb[i2];
635         s[1] = xb[i2 + 1];
636         s[2] = xb[i2 + 2];
637         s[3] = xb[i2 + 3];
638         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
639         x[i2]     = xw[0];
640         x[i2 + 1] = xw[1];
641         x[i2 + 2] = xw[2];
642         x[i2 + 3] = xw[3];
643         i2 -= 4;
644         idiag -= 16;
645         for (i = m - 2; i >= 0; i--) {
646           v    = aa + 16 * (diag[i] + 1);
647           vi   = aj + diag[i] + 1;
648           nz   = ai[i + 1] - diag[i] - 1;
649           s[0] = xb[i2];
650           s[1] = xb[i2 + 1];
651           s[2] = xb[i2 + 2];
652           s[3] = xb[i2 + 3];
653           while (nz--) {
654             idx   = 4 * (*vi++);
655             xw[0] = x[idx];
656             xw[1] = x[1 + idx];
657             xw[2] = x[2 + idx];
658             xw[3] = x[3 + idx];
659             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
660             v += 16;
661           }
662           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
663           x[i2]     = xw[0];
664           x[i2 + 1] = xw[1];
665           x[i2 + 2] = xw[2];
666           x[i2 + 3] = xw[3];
667           idiag -= 16;
668           i2 -= 4;
669         }
670         break;
671       case 5:
672         s[0] = xb[i2];
673         s[1] = xb[i2 + 1];
674         s[2] = xb[i2 + 2];
675         s[3] = xb[i2 + 3];
676         s[4] = xb[i2 + 4];
677         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
678         x[i2]     = xw[0];
679         x[i2 + 1] = xw[1];
680         x[i2 + 2] = xw[2];
681         x[i2 + 3] = xw[3];
682         x[i2 + 4] = xw[4];
683         i2 -= 5;
684         idiag -= 25;
685         for (i = m - 2; i >= 0; i--) {
686           v    = aa + 25 * (diag[i] + 1);
687           vi   = aj + diag[i] + 1;
688           nz   = ai[i + 1] - diag[i] - 1;
689           s[0] = xb[i2];
690           s[1] = xb[i2 + 1];
691           s[2] = xb[i2 + 2];
692           s[3] = xb[i2 + 3];
693           s[4] = xb[i2 + 4];
694           while (nz--) {
695             idx   = 5 * (*vi++);
696             xw[0] = x[idx];
697             xw[1] = x[1 + idx];
698             xw[2] = x[2 + idx];
699             xw[3] = x[3 + idx];
700             xw[4] = x[4 + idx];
701             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
702             v += 25;
703           }
704           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
705           x[i2]     = xw[0];
706           x[i2 + 1] = xw[1];
707           x[i2 + 2] = xw[2];
708           x[i2 + 3] = xw[3];
709           x[i2 + 4] = xw[4];
710           idiag -= 25;
711           i2 -= 5;
712         }
713         break;
714       case 6:
715         s[0] = xb[i2];
716         s[1] = xb[i2 + 1];
717         s[2] = xb[i2 + 2];
718         s[3] = xb[i2 + 3];
719         s[4] = xb[i2 + 4];
720         s[5] = xb[i2 + 5];
721         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
722         x[i2]     = xw[0];
723         x[i2 + 1] = xw[1];
724         x[i2 + 2] = xw[2];
725         x[i2 + 3] = xw[3];
726         x[i2 + 4] = xw[4];
727         x[i2 + 5] = xw[5];
728         i2 -= 6;
729         idiag -= 36;
730         for (i = m - 2; i >= 0; i--) {
731           v    = aa + 36 * (diag[i] + 1);
732           vi   = aj + diag[i] + 1;
733           nz   = ai[i + 1] - diag[i] - 1;
734           s[0] = xb[i2];
735           s[1] = xb[i2 + 1];
736           s[2] = xb[i2 + 2];
737           s[3] = xb[i2 + 3];
738           s[4] = xb[i2 + 4];
739           s[5] = xb[i2 + 5];
740           while (nz--) {
741             idx   = 6 * (*vi++);
742             xw[0] = x[idx];
743             xw[1] = x[1 + idx];
744             xw[2] = x[2 + idx];
745             xw[3] = x[3 + idx];
746             xw[4] = x[4 + idx];
747             xw[5] = x[5 + idx];
748             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
749             v += 36;
750           }
751           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
752           x[i2]     = xw[0];
753           x[i2 + 1] = xw[1];
754           x[i2 + 2] = xw[2];
755           x[i2 + 3] = xw[3];
756           x[i2 + 4] = xw[4];
757           x[i2 + 5] = xw[5];
758           idiag -= 36;
759           i2 -= 6;
760         }
761         break;
762       case 7:
763         s[0] = xb[i2];
764         s[1] = xb[i2 + 1];
765         s[2] = xb[i2 + 2];
766         s[3] = xb[i2 + 3];
767         s[4] = xb[i2 + 4];
768         s[5] = xb[i2 + 5];
769         s[6] = xb[i2 + 6];
770         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
771         x[i2]     = xw[0];
772         x[i2 + 1] = xw[1];
773         x[i2 + 2] = xw[2];
774         x[i2 + 3] = xw[3];
775         x[i2 + 4] = xw[4];
776         x[i2 + 5] = xw[5];
777         x[i2 + 6] = xw[6];
778         i2 -= 7;
779         idiag -= 49;
780         for (i = m - 2; i >= 0; i--) {
781           v    = aa + 49 * (diag[i] + 1);
782           vi   = aj + diag[i] + 1;
783           nz   = ai[i + 1] - diag[i] - 1;
784           s[0] = xb[i2];
785           s[1] = xb[i2 + 1];
786           s[2] = xb[i2 + 2];
787           s[3] = xb[i2 + 3];
788           s[4] = xb[i2 + 4];
789           s[5] = xb[i2 + 5];
790           s[6] = xb[i2 + 6];
791           while (nz--) {
792             idx   = 7 * (*vi++);
793             xw[0] = x[idx];
794             xw[1] = x[1 + idx];
795             xw[2] = x[2 + idx];
796             xw[3] = x[3 + idx];
797             xw[4] = x[4 + idx];
798             xw[5] = x[5 + idx];
799             xw[6] = x[6 + idx];
800             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
801             v += 49;
802           }
803           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
804           x[i2]     = xw[0];
805           x[i2 + 1] = xw[1];
806           x[i2 + 2] = xw[2];
807           x[i2 + 3] = xw[3];
808           x[i2 + 4] = xw[4];
809           x[i2 + 5] = xw[5];
810           x[i2 + 6] = xw[6];
811           idiag -= 49;
812           i2 -= 7;
813         }
814         break;
815       default:
816         PetscCall(PetscArraycpy(w, xb + i2, bs));
817         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
818         i2 -= bs;
819         idiag -= bs2;
820         for (i = m - 2; i >= 0; i--) {
821           v  = aa + bs2 * (diag[i] + 1);
822           vi = aj + diag[i] + 1;
823           nz = ai[i + 1] - diag[i] - 1;
824 
825           PetscCall(PetscArraycpy(w, xb + i2, bs));
826           /* copy all rows of x that are needed into contiguous space */
827           workt = work;
828           for (j = 0; j < nz; j++) {
829             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
830             workt += bs;
831           }
832           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
833           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
834 
835           idiag -= bs2;
836           i2 -= bs;
837         }
838         break;
839       }
840       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
841     }
842     its--;
843   }
844   while (its--) {
845     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
846       idiag = a->idiag;
847       i2    = 0;
848       switch (bs) {
849       case 1:
850         for (i = 0; i < m; i++) {
851           v    = aa + ai[i];
852           vi   = aj + ai[i];
853           nz   = ai[i + 1] - ai[i];
854           s[0] = b[i2];
855           for (j = 0; j < nz; j++) {
856             xw[0] = x[vi[j]];
857             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
858           }
859           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
860           x[i2] += xw[0];
861           idiag += 1;
862           i2 += 1;
863         }
864         break;
865       case 2:
866         for (i = 0; i < m; i++) {
867           v    = aa + 4 * ai[i];
868           vi   = aj + ai[i];
869           nz   = ai[i + 1] - ai[i];
870           s[0] = b[i2];
871           s[1] = b[i2 + 1];
872           for (j = 0; j < nz; j++) {
873             idx   = 2 * vi[j];
874             it    = 4 * j;
875             xw[0] = x[idx];
876             xw[1] = x[1 + idx];
877             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
878           }
879           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
880           x[i2] += xw[0];
881           x[i2 + 1] += xw[1];
882           idiag += 4;
883           i2 += 2;
884         }
885         break;
886       case 3:
887         for (i = 0; i < m; i++) {
888           v    = aa + 9 * ai[i];
889           vi   = aj + ai[i];
890           nz   = ai[i + 1] - ai[i];
891           s[0] = b[i2];
892           s[1] = b[i2 + 1];
893           s[2] = b[i2 + 2];
894           while (nz--) {
895             idx   = 3 * (*vi++);
896             xw[0] = x[idx];
897             xw[1] = x[1 + idx];
898             xw[2] = x[2 + idx];
899             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
900             v += 9;
901           }
902           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
903           x[i2] += xw[0];
904           x[i2 + 1] += xw[1];
905           x[i2 + 2] += xw[2];
906           idiag += 9;
907           i2 += 3;
908         }
909         break;
910       case 4:
911         for (i = 0; i < m; i++) {
912           v    = aa + 16 * ai[i];
913           vi   = aj + ai[i];
914           nz   = ai[i + 1] - ai[i];
915           s[0] = b[i2];
916           s[1] = b[i2 + 1];
917           s[2] = b[i2 + 2];
918           s[3] = b[i2 + 3];
919           while (nz--) {
920             idx   = 4 * (*vi++);
921             xw[0] = x[idx];
922             xw[1] = x[1 + idx];
923             xw[2] = x[2 + idx];
924             xw[3] = x[3 + idx];
925             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
926             v += 16;
927           }
928           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
929           x[i2] += xw[0];
930           x[i2 + 1] += xw[1];
931           x[i2 + 2] += xw[2];
932           x[i2 + 3] += xw[3];
933           idiag += 16;
934           i2 += 4;
935         }
936         break;
937       case 5:
938         for (i = 0; i < m; i++) {
939           v    = aa + 25 * ai[i];
940           vi   = aj + ai[i];
941           nz   = ai[i + 1] - ai[i];
942           s[0] = b[i2];
943           s[1] = b[i2 + 1];
944           s[2] = b[i2 + 2];
945           s[3] = b[i2 + 3];
946           s[4] = b[i2 + 4];
947           while (nz--) {
948             idx   = 5 * (*vi++);
949             xw[0] = x[idx];
950             xw[1] = x[1 + idx];
951             xw[2] = x[2 + idx];
952             xw[3] = x[3 + idx];
953             xw[4] = x[4 + idx];
954             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
955             v += 25;
956           }
957           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
958           x[i2] += xw[0];
959           x[i2 + 1] += xw[1];
960           x[i2 + 2] += xw[2];
961           x[i2 + 3] += xw[3];
962           x[i2 + 4] += xw[4];
963           idiag += 25;
964           i2 += 5;
965         }
966         break;
967       case 6:
968         for (i = 0; i < m; i++) {
969           v    = aa + 36 * ai[i];
970           vi   = aj + ai[i];
971           nz   = ai[i + 1] - ai[i];
972           s[0] = b[i2];
973           s[1] = b[i2 + 1];
974           s[2] = b[i2 + 2];
975           s[3] = b[i2 + 3];
976           s[4] = b[i2 + 4];
977           s[5] = b[i2 + 5];
978           while (nz--) {
979             idx   = 6 * (*vi++);
980             xw[0] = x[idx];
981             xw[1] = x[1 + idx];
982             xw[2] = x[2 + idx];
983             xw[3] = x[3 + idx];
984             xw[4] = x[4 + idx];
985             xw[5] = x[5 + idx];
986             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
987             v += 36;
988           }
989           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
990           x[i2] += xw[0];
991           x[i2 + 1] += xw[1];
992           x[i2 + 2] += xw[2];
993           x[i2 + 3] += xw[3];
994           x[i2 + 4] += xw[4];
995           x[i2 + 5] += xw[5];
996           idiag += 36;
997           i2 += 6;
998         }
999         break;
1000       case 7:
1001         for (i = 0; i < m; i++) {
1002           v    = aa + 49 * ai[i];
1003           vi   = aj + ai[i];
1004           nz   = ai[i + 1] - ai[i];
1005           s[0] = b[i2];
1006           s[1] = b[i2 + 1];
1007           s[2] = b[i2 + 2];
1008           s[3] = b[i2 + 3];
1009           s[4] = b[i2 + 4];
1010           s[5] = b[i2 + 5];
1011           s[6] = b[i2 + 6];
1012           while (nz--) {
1013             idx   = 7 * (*vi++);
1014             xw[0] = x[idx];
1015             xw[1] = x[1 + idx];
1016             xw[2] = x[2 + idx];
1017             xw[3] = x[3 + idx];
1018             xw[4] = x[4 + idx];
1019             xw[5] = x[5 + idx];
1020             xw[6] = x[6 + idx];
1021             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1022             v += 49;
1023           }
1024           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1025           x[i2] += xw[0];
1026           x[i2 + 1] += xw[1];
1027           x[i2 + 2] += xw[2];
1028           x[i2 + 3] += xw[3];
1029           x[i2 + 4] += xw[4];
1030           x[i2 + 5] += xw[5];
1031           x[i2 + 6] += xw[6];
1032           idiag += 49;
1033           i2 += 7;
1034         }
1035         break;
1036       default:
1037         for (i = 0; i < m; i++) {
1038           v  = aa + bs2 * ai[i];
1039           vi = aj + ai[i];
1040           nz = ai[i + 1] - ai[i];
1041 
1042           PetscCall(PetscArraycpy(w, b + i2, bs));
1043           /* copy all rows of x that are needed into contiguous space */
1044           workt = work;
1045           for (j = 0; j < nz; j++) {
1046             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1047             workt += bs;
1048           }
1049           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1050           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1051 
1052           idiag += bs2;
1053           i2 += bs;
1054         }
1055         break;
1056       }
1057       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1058     }
1059     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1060       idiag = a->idiag + bs2 * (a->mbs - 1);
1061       i2    = bs * (m - 1);
1062       switch (bs) {
1063       case 1:
1064         for (i = m - 1; i >= 0; i--) {
1065           v    = aa + ai[i];
1066           vi   = aj + ai[i];
1067           nz   = ai[i + 1] - ai[i];
1068           s[0] = b[i2];
1069           for (j = 0; j < nz; j++) {
1070             xw[0] = x[vi[j]];
1071             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1072           }
1073           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1074           x[i2] += xw[0];
1075           idiag -= 1;
1076           i2 -= 1;
1077         }
1078         break;
1079       case 2:
1080         for (i = m - 1; i >= 0; i--) {
1081           v    = aa + 4 * ai[i];
1082           vi   = aj + ai[i];
1083           nz   = ai[i + 1] - ai[i];
1084           s[0] = b[i2];
1085           s[1] = b[i2 + 1];
1086           for (j = 0; j < nz; j++) {
1087             idx   = 2 * vi[j];
1088             it    = 4 * j;
1089             xw[0] = x[idx];
1090             xw[1] = x[1 + idx];
1091             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1092           }
1093           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
1094           x[i2] += xw[0];
1095           x[i2 + 1] += xw[1];
1096           idiag -= 4;
1097           i2 -= 2;
1098         }
1099         break;
1100       case 3:
1101         for (i = m - 1; i >= 0; i--) {
1102           v    = aa + 9 * ai[i];
1103           vi   = aj + ai[i];
1104           nz   = ai[i + 1] - ai[i];
1105           s[0] = b[i2];
1106           s[1] = b[i2 + 1];
1107           s[2] = b[i2 + 2];
1108           while (nz--) {
1109             idx   = 3 * (*vi++);
1110             xw[0] = x[idx];
1111             xw[1] = x[1 + idx];
1112             xw[2] = x[2 + idx];
1113             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1114             v += 9;
1115           }
1116           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
1117           x[i2] += xw[0];
1118           x[i2 + 1] += xw[1];
1119           x[i2 + 2] += xw[2];
1120           idiag -= 9;
1121           i2 -= 3;
1122         }
1123         break;
1124       case 4:
1125         for (i = m - 1; i >= 0; i--) {
1126           v    = aa + 16 * ai[i];
1127           vi   = aj + ai[i];
1128           nz   = ai[i + 1] - ai[i];
1129           s[0] = b[i2];
1130           s[1] = b[i2 + 1];
1131           s[2] = b[i2 + 2];
1132           s[3] = b[i2 + 3];
1133           while (nz--) {
1134             idx   = 4 * (*vi++);
1135             xw[0] = x[idx];
1136             xw[1] = x[1 + idx];
1137             xw[2] = x[2 + idx];
1138             xw[3] = x[3 + idx];
1139             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1140             v += 16;
1141           }
1142           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
1143           x[i2] += xw[0];
1144           x[i2 + 1] += xw[1];
1145           x[i2 + 2] += xw[2];
1146           x[i2 + 3] += xw[3];
1147           idiag -= 16;
1148           i2 -= 4;
1149         }
1150         break;
1151       case 5:
1152         for (i = m - 1; i >= 0; i--) {
1153           v    = aa + 25 * ai[i];
1154           vi   = aj + ai[i];
1155           nz   = ai[i + 1] - ai[i];
1156           s[0] = b[i2];
1157           s[1] = b[i2 + 1];
1158           s[2] = b[i2 + 2];
1159           s[3] = b[i2 + 3];
1160           s[4] = b[i2 + 4];
1161           while (nz--) {
1162             idx   = 5 * (*vi++);
1163             xw[0] = x[idx];
1164             xw[1] = x[1 + idx];
1165             xw[2] = x[2 + idx];
1166             xw[3] = x[3 + idx];
1167             xw[4] = x[4 + idx];
1168             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1169             v += 25;
1170           }
1171           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
1172           x[i2] += xw[0];
1173           x[i2 + 1] += xw[1];
1174           x[i2 + 2] += xw[2];
1175           x[i2 + 3] += xw[3];
1176           x[i2 + 4] += xw[4];
1177           idiag -= 25;
1178           i2 -= 5;
1179         }
1180         break;
1181       case 6:
1182         for (i = m - 1; i >= 0; i--) {
1183           v    = aa + 36 * ai[i];
1184           vi   = aj + ai[i];
1185           nz   = ai[i + 1] - ai[i];
1186           s[0] = b[i2];
1187           s[1] = b[i2 + 1];
1188           s[2] = b[i2 + 2];
1189           s[3] = b[i2 + 3];
1190           s[4] = b[i2 + 4];
1191           s[5] = b[i2 + 5];
1192           while (nz--) {
1193             idx   = 6 * (*vi++);
1194             xw[0] = x[idx];
1195             xw[1] = x[1 + idx];
1196             xw[2] = x[2 + idx];
1197             xw[3] = x[3 + idx];
1198             xw[4] = x[4 + idx];
1199             xw[5] = x[5 + idx];
1200             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1201             v += 36;
1202           }
1203           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
1204           x[i2] += xw[0];
1205           x[i2 + 1] += xw[1];
1206           x[i2 + 2] += xw[2];
1207           x[i2 + 3] += xw[3];
1208           x[i2 + 4] += xw[4];
1209           x[i2 + 5] += xw[5];
1210           idiag -= 36;
1211           i2 -= 6;
1212         }
1213         break;
1214       case 7:
1215         for (i = m - 1; i >= 0; i--) {
1216           v    = aa + 49 * ai[i];
1217           vi   = aj + ai[i];
1218           nz   = ai[i + 1] - ai[i];
1219           s[0] = b[i2];
1220           s[1] = b[i2 + 1];
1221           s[2] = b[i2 + 2];
1222           s[3] = b[i2 + 3];
1223           s[4] = b[i2 + 4];
1224           s[5] = b[i2 + 5];
1225           s[6] = b[i2 + 6];
1226           while (nz--) {
1227             idx   = 7 * (*vi++);
1228             xw[0] = x[idx];
1229             xw[1] = x[1 + idx];
1230             xw[2] = x[2 + idx];
1231             xw[3] = x[3 + idx];
1232             xw[4] = x[4 + idx];
1233             xw[5] = x[5 + idx];
1234             xw[6] = x[6 + idx];
1235             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1236             v += 49;
1237           }
1238           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1239           x[i2] += xw[0];
1240           x[i2 + 1] += xw[1];
1241           x[i2 + 2] += xw[2];
1242           x[i2 + 3] += xw[3];
1243           x[i2 + 4] += xw[4];
1244           x[i2 + 5] += xw[5];
1245           x[i2 + 6] += xw[6];
1246           idiag -= 49;
1247           i2 -= 7;
1248         }
1249         break;
1250       default:
1251         for (i = m - 1; i >= 0; i--) {
1252           v  = aa + bs2 * ai[i];
1253           vi = aj + ai[i];
1254           nz = ai[i + 1] - ai[i];
1255 
1256           PetscCall(PetscArraycpy(w, b + i2, bs));
1257           /* copy all rows of x that are needed into contiguous space */
1258           workt = work;
1259           for (j = 0; j < nz; j++) {
1260             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1261             workt += bs;
1262           }
1263           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1264           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1265 
1266           idiag -= bs2;
1267           i2 -= bs;
1268         }
1269         break;
1270       }
1271       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1272     }
1273   }
1274   PetscCall(VecRestoreArray(xx, &x));
1275   PetscCall(VecRestoreArrayRead(bb, &b));
1276   PetscFunctionReturn(PETSC_SUCCESS);
1277 }
1278 
1279 /*
1280     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1281 */
1282 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1283   #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1284 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1285   #define matsetvaluesblocked4_ matsetvaluesblocked4
1286 #endif
1287 
1288 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1289 {
1290   Mat                A = *AA;
1291   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1292   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1293   PetscInt          *ai = a->i, *ailen = a->ilen;
1294   PetscInt          *aj = a->j, stepval, lastcol = -1;
1295   const PetscScalar *value = v;
1296   MatScalar         *ap, *aa = a->a, *bap;
1297 
1298   PetscFunctionBegin;
1299   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1300   stepval = (n - 1) * 4;
1301   for (k = 0; k < m; k++) { /* loop over added rows */
1302     row  = im[k];
1303     rp   = aj + ai[row];
1304     ap   = aa + 16 * ai[row];
1305     nrow = ailen[row];
1306     low  = 0;
1307     high = nrow;
1308     for (l = 0; l < n; l++) { /* loop over added columns */
1309       col = in[l];
1310       if (col <= lastcol) low = 0;
1311       else high = nrow;
1312       lastcol = col;
1313       value   = v + k * (stepval + 4 + l) * 4;
1314       while (high - low > 7) {
1315         t = (low + high) / 2;
1316         if (rp[t] > col) high = t;
1317         else low = t;
1318       }
1319       for (i = low; i < high; i++) {
1320         if (rp[i] > col) break;
1321         if (rp[i] == col) {
1322           bap = ap + 16 * i;
1323           for (ii = 0; ii < 4; ii++, value += stepval) {
1324             for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1325           }
1326           goto noinsert2;
1327         }
1328       }
1329       N = nrow++ - 1;
1330       high++; /* added new column index thus must search to one higher than before */
1331       /* shift up all the later entries in this row */
1332       for (ii = N; ii >= i; ii--) {
1333         rp[ii + 1] = rp[ii];
1334         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1335       }
1336       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1337       rp[i] = col;
1338       bap   = ap + 16 * i;
1339       for (ii = 0; ii < 4; ii++, value += stepval) {
1340         for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1341       }
1342     noinsert2:;
1343       low = i;
1344     }
1345     ailen[row] = nrow;
1346   }
1347   PetscFunctionReturnVoid();
1348 }
1349 
1350 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1351   #define matsetvalues4_ MATSETVALUES4
1352 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1353   #define matsetvalues4_ matsetvalues4
1354 #endif
1355 
1356 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1357 {
1358   Mat          A = *AA;
1359   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1360   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1361   PetscInt    *ai = a->i, *ailen = a->ilen;
1362   PetscInt    *aj = a->j, brow, bcol;
1363   PetscInt     ridx, cidx, lastcol = -1;
1364   MatScalar   *ap, value, *aa      = a->a, *bap;
1365 
1366   PetscFunctionBegin;
1367   for (k = 0; k < m; k++) { /* loop over added rows */
1368     row  = im[k];
1369     brow = row / 4;
1370     rp   = aj + ai[brow];
1371     ap   = aa + 16 * ai[brow];
1372     nrow = ailen[brow];
1373     low  = 0;
1374     high = nrow;
1375     for (l = 0; l < n; l++) { /* loop over added columns */
1376       col   = in[l];
1377       bcol  = col / 4;
1378       ridx  = row % 4;
1379       cidx  = col % 4;
1380       value = v[l + k * n];
1381       if (col <= lastcol) low = 0;
1382       else high = nrow;
1383       lastcol = col;
1384       while (high - low > 7) {
1385         t = (low + high) / 2;
1386         if (rp[t] > bcol) high = t;
1387         else low = t;
1388       }
1389       for (i = low; i < high; i++) {
1390         if (rp[i] > bcol) break;
1391         if (rp[i] == bcol) {
1392           bap = ap + 16 * i + 4 * cidx + ridx;
1393           *bap += value;
1394           goto noinsert1;
1395         }
1396       }
1397       N = nrow++ - 1;
1398       high++; /* added new column thus must search to one higher than before */
1399       /* shift up all the later entries in this row */
1400       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
1401       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
1402       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1403       rp[i]                        = bcol;
1404       ap[16 * i + 4 * cidx + ridx] = value;
1405     noinsert1:;
1406       low = i;
1407     }
1408     ailen[brow] = nrow;
1409   }
1410   PetscFunctionReturnVoid();
1411 }
1412 
1413 /*
1414      Checks for missing diagonals
1415 */
1416 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1417 {
1418   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1419   PetscInt    *diag, *ii = a->i, i;
1420 
1421   PetscFunctionBegin;
1422   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
1423   *missing = PETSC_FALSE;
1424   if (A->rmap->n > 0 && !ii) {
1425     *missing = PETSC_TRUE;
1426     if (d) *d = 0;
1427     PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n"));
1428   } else {
1429     PetscInt n;
1430     n    = PetscMin(a->mbs, a->nbs);
1431     diag = a->diag;
1432     for (i = 0; i < n; i++) {
1433       if (diag[i] >= ii[i + 1]) {
1434         *missing = PETSC_TRUE;
1435         if (d) *d = i;
1436         PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i));
1437         break;
1438       }
1439     }
1440   }
1441   PetscFunctionReturn(PETSC_SUCCESS);
1442 }
1443 
1444 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1445 {
1446   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1447   PetscInt     i, j, m = a->mbs;
1448 
1449   PetscFunctionBegin;
1450   if (!a->diag) {
1451     PetscCall(PetscMalloc1(m, &a->diag));
1452     a->free_diag = PETSC_TRUE;
1453   }
1454   for (i = 0; i < m; i++) {
1455     a->diag[i] = a->i[i + 1];
1456     for (j = a->i[i]; j < a->i[i + 1]; j++) {
1457       if (a->j[j] == i) {
1458         a->diag[i] = j;
1459         break;
1460       }
1461     }
1462   }
1463   PetscFunctionReturn(PETSC_SUCCESS);
1464 }
1465 
1466 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1467 {
1468   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1469   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
1470   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
1471 
1472   PetscFunctionBegin;
1473   *nn = n;
1474   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1475   if (symmetric) {
1476     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1477     nz = tia[n];
1478   } else {
1479     tia = a->i;
1480     tja = a->j;
1481   }
1482 
1483   if (!blockcompressed && bs > 1) {
1484     (*nn) *= bs;
1485     /* malloc & create the natural set of indices */
1486     PetscCall(PetscMalloc1((n + 1) * bs, ia));
1487     if (n) {
1488       (*ia)[0] = oshift;
1489       for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
1490     }
1491 
1492     for (i = 1; i < n; i++) {
1493       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1494       for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
1495     }
1496     if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1497 
1498     if (inja) {
1499       PetscCall(PetscMalloc1(nz * bs * bs, ja));
1500       cnt = 0;
1501       for (i = 0; i < n; i++) {
1502         for (j = 0; j < bs; j++) {
1503           for (k = tia[i]; k < tia[i + 1]; k++) {
1504             for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
1505           }
1506         }
1507       }
1508     }
1509 
1510     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1511       PetscCall(PetscFree(tia));
1512       PetscCall(PetscFree(tja));
1513     }
1514   } else if (oshift == 1) {
1515     if (symmetric) {
1516       nz = tia[A->rmap->n / bs];
1517       /*  add 1 to i and j indices */
1518       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1519       *ia = tia;
1520       if (ja) {
1521         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1522         *ja = tja;
1523       }
1524     } else {
1525       nz = a->i[A->rmap->n / bs];
1526       /* malloc space and  add 1 to i and j indices */
1527       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1528       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1529       if (ja) {
1530         PetscCall(PetscMalloc1(nz, ja));
1531         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1532       }
1533     }
1534   } else {
1535     *ia = tia;
1536     if (ja) *ja = tja;
1537   }
1538   PetscFunctionReturn(PETSC_SUCCESS);
1539 }
1540 
1541 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1542 {
1543   PetscFunctionBegin;
1544   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1545   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1546     PetscCall(PetscFree(*ia));
1547     if (ja) PetscCall(PetscFree(*ja));
1548   }
1549   PetscFunctionReturn(PETSC_SUCCESS);
1550 }
1551 
1552 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1553 {
1554   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1555 
1556   PetscFunctionBegin;
1557 #if defined(PETSC_USE_LOG)
1558   PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
1559 #endif
1560   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
1561   PetscCall(ISDestroy(&a->row));
1562   PetscCall(ISDestroy(&a->col));
1563   if (a->free_diag) PetscCall(PetscFree(a->diag));
1564   PetscCall(PetscFree(a->idiag));
1565   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
1566   PetscCall(PetscFree(a->solve_work));
1567   PetscCall(PetscFree(a->mult_work));
1568   PetscCall(PetscFree(a->sor_workt));
1569   PetscCall(PetscFree(a->sor_work));
1570   PetscCall(ISDestroy(&a->icol));
1571   PetscCall(PetscFree(a->saved_values));
1572   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1573 
1574   PetscCall(MatDestroy(&a->sbaijMat));
1575   PetscCall(MatDestroy(&a->parent));
1576   PetscCall(PetscFree(A->data));
1577 
1578   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
1579   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
1580   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
1581   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
1582   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
1583   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
1584   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
1585   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
1586   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
1587   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
1588   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
1589   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
1590 #if defined(PETSC_HAVE_HYPRE)
1591   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
1592 #endif
1593   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
1594   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1595   PetscFunctionReturn(PETSC_SUCCESS);
1596 }
1597 
1598 PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1599 {
1600   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1601 
1602   PetscFunctionBegin;
1603   switch (op) {
1604   case MAT_ROW_ORIENTED:
1605     a->roworiented = flg;
1606     break;
1607   case MAT_KEEP_NONZERO_PATTERN:
1608     a->keepnonzeropattern = flg;
1609     break;
1610   case MAT_NEW_NONZERO_LOCATIONS:
1611     a->nonew = (flg ? 0 : 1);
1612     break;
1613   case MAT_NEW_NONZERO_LOCATION_ERR:
1614     a->nonew = (flg ? -1 : 0);
1615     break;
1616   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1617     a->nonew = (flg ? -2 : 0);
1618     break;
1619   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1620     a->nounused = (flg ? -1 : 0);
1621     break;
1622   case MAT_FORCE_DIAGONAL_ENTRIES:
1623   case MAT_IGNORE_OFF_PROC_ENTRIES:
1624   case MAT_USE_HASH_TABLE:
1625   case MAT_SORTED_FULL:
1626     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1627     break;
1628   case MAT_SPD:
1629   case MAT_SYMMETRIC:
1630   case MAT_STRUCTURALLY_SYMMETRIC:
1631   case MAT_HERMITIAN:
1632   case MAT_SYMMETRY_ETERNAL:
1633   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1634   case MAT_SUBMAT_SINGLEIS:
1635   case MAT_STRUCTURE_ONLY:
1636   case MAT_SPD_ETERNAL:
1637     /* if the diagonal matrix is square it inherits some of the properties above */
1638     break;
1639   default:
1640     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1641   }
1642   PetscFunctionReturn(PETSC_SUCCESS);
1643 }
1644 
1645 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1646 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1647 {
1648   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
1649   MatScalar   *aa_i;
1650   PetscScalar *v_i;
1651 
1652   PetscFunctionBegin;
1653   bs  = A->rmap->bs;
1654   bs2 = bs * bs;
1655   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1656 
1657   bn  = row / bs; /* Block number */
1658   bp  = row % bs; /* Block Position */
1659   M   = ai[bn + 1] - ai[bn];
1660   *nz = bs * M;
1661 
1662   if (v) {
1663     *v = NULL;
1664     if (*nz) {
1665       PetscCall(PetscMalloc1(*nz, v));
1666       for (i = 0; i < M; i++) { /* for each block in the block row */
1667         v_i  = *v + i * bs;
1668         aa_i = aa + bs2 * (ai[bn] + i);
1669         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
1670       }
1671     }
1672   }
1673 
1674   if (idx) {
1675     *idx = NULL;
1676     if (*nz) {
1677       PetscCall(PetscMalloc1(*nz, idx));
1678       for (i = 0; i < M; i++) { /* for each block in the block row */
1679         idx_i = *idx + i * bs;
1680         itmp  = bs * aj[ai[bn] + i];
1681         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
1682       }
1683     }
1684   }
1685   PetscFunctionReturn(PETSC_SUCCESS);
1686 }
1687 
1688 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1689 {
1690   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1691 
1692   PetscFunctionBegin;
1693   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
1694   PetscFunctionReturn(PETSC_SUCCESS);
1695 }
1696 
1697 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1698 {
1699   PetscFunctionBegin;
1700   if (nz) *nz = 0;
1701   if (idx) PetscCall(PetscFree(*idx));
1702   if (v) PetscCall(PetscFree(*v));
1703   PetscFunctionReturn(PETSC_SUCCESS);
1704 }
1705 
1706 PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1707 {
1708   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
1709   Mat          C;
1710   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
1711   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
1712   MatScalar   *ata, *aa = a->a;
1713 
1714   PetscFunctionBegin;
1715   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1716   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1717   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1718     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1719 
1720     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
1721     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
1722     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
1723     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
1724 
1725     at  = (Mat_SeqBAIJ *)C->data;
1726     ati = at->i;
1727     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1728   } else {
1729     C   = *B;
1730     at  = (Mat_SeqBAIJ *)C->data;
1731     ati = at->i;
1732   }
1733 
1734   atj = at->j;
1735   ata = at->a;
1736 
1737   /* Copy ati into atfill so we have locations of the next free space in atj */
1738   PetscCall(PetscArraycpy(atfill, ati, nbs));
1739 
1740   /* Walk through A row-wise and mark nonzero entries of A^T. */
1741   for (i = 0; i < mbs; i++) {
1742     anzj = ai[i + 1] - ai[i];
1743     for (j = 0; j < anzj; j++) {
1744       atj[atfill[*aj]] = i;
1745       for (kr = 0; kr < bs; kr++) {
1746         for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
1747       }
1748       atfill[*aj++] += 1;
1749     }
1750   }
1751   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1752   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1753 
1754   /* Clean up temporary space and complete requests. */
1755   PetscCall(PetscFree(atfill));
1756 
1757   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1758     PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1759     *B = C;
1760   } else {
1761     PetscCall(MatHeaderMerge(A, &C));
1762   }
1763   PetscFunctionReturn(PETSC_SUCCESS);
1764 }
1765 
1766 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1767 {
1768   Mat Btrans;
1769 
1770   PetscFunctionBegin;
1771   *f = PETSC_FALSE;
1772   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
1773   PetscCall(MatEqual_SeqBAIJ(B, Btrans, f));
1774   PetscCall(MatDestroy(&Btrans));
1775   PetscFunctionReturn(PETSC_SUCCESS);
1776 }
1777 
1778 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1779 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1780 {
1781   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1782   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1783   PetscInt    *rowlens, *colidxs;
1784   PetscScalar *matvals;
1785 
1786   PetscFunctionBegin;
1787   PetscCall(PetscViewerSetUp(viewer));
1788 
1789   M  = mat->rmap->N;
1790   N  = mat->cmap->N;
1791   m  = mat->rmap->n;
1792   bs = mat->rmap->bs;
1793   nz = bs * bs * A->nz;
1794 
1795   /* write matrix header */
1796   header[0] = MAT_FILE_CLASSID;
1797   header[1] = M;
1798   header[2] = N;
1799   header[3] = nz;
1800   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1801 
1802   /* store row lengths */
1803   PetscCall(PetscMalloc1(m, &rowlens));
1804   for (cnt = 0, i = 0; i < A->mbs; i++)
1805     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
1806   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
1807   PetscCall(PetscFree(rowlens));
1808 
1809   /* store column indices  */
1810   PetscCall(PetscMalloc1(nz, &colidxs));
1811   for (cnt = 0, i = 0; i < A->mbs; i++)
1812     for (k = 0; k < bs; k++)
1813       for (j = A->i[i]; j < A->i[i + 1]; j++)
1814         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
1815   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1816   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
1817   PetscCall(PetscFree(colidxs));
1818 
1819   /* store nonzero values */
1820   PetscCall(PetscMalloc1(nz, &matvals));
1821   for (cnt = 0, i = 0; i < A->mbs; i++)
1822     for (k = 0; k < bs; k++)
1823       for (j = A->i[i]; j < A->i[i + 1]; j++)
1824         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
1825   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1826   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
1827   PetscCall(PetscFree(matvals));
1828 
1829   /* write block size option to the viewer's .info file */
1830   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1831   PetscFunctionReturn(PETSC_SUCCESS);
1832 }
1833 
1834 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1835 {
1836   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1837   PetscInt     i, bs = A->rmap->bs, k;
1838 
1839   PetscFunctionBegin;
1840   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1841   for (i = 0; i < a->mbs; i++) {
1842     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
1843     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
1844     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1845   }
1846   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1847   PetscFunctionReturn(PETSC_SUCCESS);
1848 }
1849 
1850 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1851 {
1852   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1853   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1854   PetscViewerFormat format;
1855 
1856   PetscFunctionBegin;
1857   if (A->structure_only) {
1858     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
1859     PetscFunctionReturn(PETSC_SUCCESS);
1860   }
1861 
1862   PetscCall(PetscViewerGetFormat(viewer, &format));
1863   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1864     PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1865   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1866     const char *matname;
1867     Mat         aij;
1868     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
1869     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
1870     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
1871     PetscCall(MatView(aij, viewer));
1872     PetscCall(MatDestroy(&aij));
1873   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1874     PetscFunctionReturn(PETSC_SUCCESS);
1875   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1876     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1877     for (i = 0; i < a->mbs; i++) {
1878       for (j = 0; j < bs; j++) {
1879         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1880         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1881           for (l = 0; l < bs; l++) {
1882 #if defined(PETSC_USE_COMPLEX)
1883             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1884               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1885             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1886               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1887             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1888               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1889             }
1890 #else
1891             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1892 #endif
1893           }
1894         }
1895         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1896       }
1897     }
1898     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1899   } else {
1900     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1901     for (i = 0; i < a->mbs; i++) {
1902       for (j = 0; j < bs; j++) {
1903         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1904         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1905           for (l = 0; l < bs; l++) {
1906 #if defined(PETSC_USE_COMPLEX)
1907             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
1908               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1909             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
1910               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1911             } else {
1912               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1913             }
1914 #else
1915             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1916 #endif
1917           }
1918         }
1919         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1920       }
1921     }
1922     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1923   }
1924   PetscCall(PetscViewerFlush(viewer));
1925   PetscFunctionReturn(PETSC_SUCCESS);
1926 }
1927 
1928 #include <petscdraw.h>
1929 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1930 {
1931   Mat               A = (Mat)Aa;
1932   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1933   PetscInt          row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2;
1934   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1935   MatScalar        *aa;
1936   PetscViewer       viewer;
1937   PetscViewerFormat format;
1938 
1939   PetscFunctionBegin;
1940   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
1941   PetscCall(PetscViewerGetFormat(viewer, &format));
1942   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
1943 
1944   /* loop over matrix elements drawing boxes */
1945 
1946   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1947     PetscDrawCollectiveBegin(draw);
1948     /* Blue for negative, Cyan for zero and  Red for positive */
1949     color = PETSC_DRAW_BLUE;
1950     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1951       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1952         y_l = A->rmap->N - row - 1.0;
1953         y_r = y_l + 1.0;
1954         x_l = a->j[j] * bs;
1955         x_r = x_l + 1.0;
1956         aa  = a->a + j * bs2;
1957         for (k = 0; k < bs; k++) {
1958           for (l = 0; l < bs; l++) {
1959             if (PetscRealPart(*aa++) >= 0.) continue;
1960             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1961           }
1962         }
1963       }
1964     }
1965     color = PETSC_DRAW_CYAN;
1966     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1967       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1968         y_l = A->rmap->N - row - 1.0;
1969         y_r = y_l + 1.0;
1970         x_l = a->j[j] * bs;
1971         x_r = x_l + 1.0;
1972         aa  = a->a + j * bs2;
1973         for (k = 0; k < bs; k++) {
1974           for (l = 0; l < bs; l++) {
1975             if (PetscRealPart(*aa++) != 0.) continue;
1976             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1977           }
1978         }
1979       }
1980     }
1981     color = PETSC_DRAW_RED;
1982     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1983       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1984         y_l = A->rmap->N - row - 1.0;
1985         y_r = y_l + 1.0;
1986         x_l = a->j[j] * bs;
1987         x_r = x_l + 1.0;
1988         aa  = a->a + j * bs2;
1989         for (k = 0; k < bs; k++) {
1990           for (l = 0; l < bs; l++) {
1991             if (PetscRealPart(*aa++) <= 0.) continue;
1992             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1993           }
1994         }
1995       }
1996     }
1997     PetscDrawCollectiveEnd(draw);
1998   } else {
1999     /* use contour shading to indicate magnitude of values */
2000     /* first determine max of all nonzero values */
2001     PetscReal minv = 0.0, maxv = 0.0;
2002     PetscDraw popup;
2003 
2004     for (i = 0; i < a->nz * a->bs2; i++) {
2005       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
2006     }
2007     if (minv >= maxv) maxv = minv + PETSC_SMALL;
2008     PetscCall(PetscDrawGetPopup(draw, &popup));
2009     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
2010 
2011     PetscDrawCollectiveBegin(draw);
2012     for (i = 0, row = 0; i < mbs; i++, row += bs) {
2013       for (j = a->i[i]; j < a->i[i + 1]; j++) {
2014         y_l = A->rmap->N - row - 1.0;
2015         y_r = y_l + 1.0;
2016         x_l = a->j[j] * bs;
2017         x_r = x_l + 1.0;
2018         aa  = a->a + j * bs2;
2019         for (k = 0; k < bs; k++) {
2020           for (l = 0; l < bs; l++) {
2021             MatScalar v = *aa++;
2022             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
2023             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2024           }
2025         }
2026       }
2027     }
2028     PetscDrawCollectiveEnd(draw);
2029   }
2030   PetscFunctionReturn(PETSC_SUCCESS);
2031 }
2032 
2033 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2034 {
2035   PetscReal xl, yl, xr, yr, w, h;
2036   PetscDraw draw;
2037   PetscBool isnull;
2038 
2039   PetscFunctionBegin;
2040   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
2041   PetscCall(PetscDrawIsNull(draw, &isnull));
2042   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
2043 
2044   xr = A->cmap->n;
2045   yr = A->rmap->N;
2046   h  = yr / 10.0;
2047   w  = xr / 10.0;
2048   xr += w;
2049   yr += h;
2050   xl = -w;
2051   yl = -h;
2052   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
2053   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
2054   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
2055   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
2056   PetscCall(PetscDrawSave(draw));
2057   PetscFunctionReturn(PETSC_SUCCESS);
2058 }
2059 
2060 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2061 {
2062   PetscBool iascii, isbinary, isdraw;
2063 
2064   PetscFunctionBegin;
2065   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2066   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2067   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
2068   if (iascii) {
2069     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
2070   } else if (isbinary) {
2071     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
2072   } else if (isdraw) {
2073     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
2074   } else {
2075     Mat B;
2076     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2077     PetscCall(MatView(B, viewer));
2078     PetscCall(MatDestroy(&B));
2079   }
2080   PetscFunctionReturn(PETSC_SUCCESS);
2081 }
2082 
2083 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2084 {
2085   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2086   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2087   PetscInt    *ai = a->i, *ailen = a->ilen;
2088   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
2089   MatScalar   *ap, *aa = a->a;
2090 
2091   PetscFunctionBegin;
2092   for (k = 0; k < m; k++) { /* loop over rows */
2093     row  = im[k];
2094     brow = row / bs;
2095     if (row < 0) {
2096       v += n;
2097       continue;
2098     } /* negative row */
2099     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2100     rp   = aj ? aj + ai[brow] : NULL;       /* mustn't add to NULL, that is UB */
2101     ap   = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */
2102     nrow = ailen[brow];
2103     for (l = 0; l < n; l++) { /* loop over columns */
2104       if (in[l] < 0) {
2105         v++;
2106         continue;
2107       } /* negative column */
2108       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
2109       col  = in[l];
2110       bcol = col / bs;
2111       cidx = col % bs;
2112       ridx = row % bs;
2113       high = nrow;
2114       low  = 0; /* assume unsorted */
2115       while (high - low > 5) {
2116         t = (low + high) / 2;
2117         if (rp[t] > bcol) high = t;
2118         else low = t;
2119       }
2120       for (i = low; i < high; i++) {
2121         if (rp[i] > bcol) break;
2122         if (rp[i] == bcol) {
2123           *v++ = ap[bs2 * i + bs * cidx + ridx];
2124           goto finished;
2125         }
2126       }
2127       *v++ = 0.0;
2128     finished:;
2129     }
2130   }
2131   PetscFunctionReturn(PETSC_SUCCESS);
2132 }
2133 
2134 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2135 {
2136   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2137   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2138   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2139   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2140   PetscBool          roworiented = a->roworiented;
2141   const PetscScalar *value       = v;
2142   MatScalar         *ap = NULL, *aa = a->a, *bap;
2143 
2144   PetscFunctionBegin;
2145   if (roworiented) {
2146     stepval = (n - 1) * bs;
2147   } else {
2148     stepval = (m - 1) * bs;
2149   }
2150   for (k = 0; k < m; k++) { /* loop over added rows */
2151     row = im[k];
2152     if (row < 0) continue;
2153     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
2154     rp = aj + ai[row];
2155     if (!A->structure_only) ap = aa + bs2 * ai[row];
2156     rmax = imax[row];
2157     nrow = ailen[row];
2158     low  = 0;
2159     high = nrow;
2160     for (l = 0; l < n; l++) { /* loop over added columns */
2161       if (in[l] < 0) continue;
2162       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
2163       col = in[l];
2164       if (!A->structure_only) {
2165         if (roworiented) {
2166           value = v + (k * (stepval + bs) + l) * bs;
2167         } else {
2168           value = v + (l * (stepval + bs) + k) * bs;
2169         }
2170       }
2171       if (col <= lastcol) low = 0;
2172       else high = nrow;
2173       lastcol = col;
2174       while (high - low > 7) {
2175         t = (low + high) / 2;
2176         if (rp[t] > col) high = t;
2177         else low = t;
2178       }
2179       for (i = low; i < high; i++) {
2180         if (rp[i] > col) break;
2181         if (rp[i] == col) {
2182           if (A->structure_only) goto noinsert2;
2183           bap = ap + bs2 * i;
2184           if (roworiented) {
2185             if (is == ADD_VALUES) {
2186               for (ii = 0; ii < bs; ii++, value += stepval) {
2187                 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2188               }
2189             } else {
2190               for (ii = 0; ii < bs; ii++, value += stepval) {
2191                 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2192               }
2193             }
2194           } else {
2195             if (is == ADD_VALUES) {
2196               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2197                 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
2198                 bap += bs;
2199               }
2200             } else {
2201               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2202                 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
2203                 bap += bs;
2204               }
2205             }
2206           }
2207           goto noinsert2;
2208         }
2209       }
2210       if (nonew == 1) goto noinsert2;
2211       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2212       if (A->structure_only) {
2213         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
2214       } else {
2215         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2216       }
2217       N = nrow++ - 1;
2218       high++;
2219       /* shift up all the later entries in this row */
2220       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2221       rp[i] = col;
2222       if (!A->structure_only) {
2223         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2224         bap = ap + bs2 * i;
2225         if (roworiented) {
2226           for (ii = 0; ii < bs; ii++, value += stepval) {
2227             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2228           }
2229         } else {
2230           for (ii = 0; ii < bs; ii++, value += stepval) {
2231             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2232           }
2233         }
2234       }
2235     noinsert2:;
2236       low = i;
2237     }
2238     ailen[row] = nrow;
2239   }
2240   PetscFunctionReturn(PETSC_SUCCESS);
2241 }
2242 
2243 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2244 {
2245   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2246   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2247   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2248   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2249   MatScalar   *aa    = a->a, *ap;
2250   PetscReal    ratio = 0.6;
2251 
2252   PetscFunctionBegin;
2253   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
2254 
2255   if (m) rmax = ailen[0];
2256   for (i = 1; i < mbs; i++) {
2257     /* move each row back by the amount of empty slots (fshift) before it*/
2258     fshift += imax[i - 1] - ailen[i - 1];
2259     rmax = PetscMax(rmax, ailen[i]);
2260     if (fshift) {
2261       ip = aj + ai[i];
2262       ap = aa + bs2 * ai[i];
2263       N  = ailen[i];
2264       PetscCall(PetscArraymove(ip - fshift, ip, N));
2265       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2266     }
2267     ai[i] = ai[i - 1] + ailen[i - 1];
2268   }
2269   if (mbs) {
2270     fshift += imax[mbs - 1] - ailen[mbs - 1];
2271     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2272   }
2273 
2274   /* reset ilen and imax for each row */
2275   a->nonzerorowcnt = 0;
2276   if (A->structure_only) {
2277     PetscCall(PetscFree2(a->imax, a->ilen));
2278   } else { /* !A->structure_only */
2279     for (i = 0; i < mbs; i++) {
2280       ailen[i] = imax[i] = ai[i + 1] - ai[i];
2281       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2282     }
2283   }
2284   a->nz = ai[mbs];
2285 
2286   /* diagonals may have moved, so kill the diagonal pointers */
2287   a->idiagvalid = PETSC_FALSE;
2288   if (fshift && a->diag) PetscCall(PetscFree(a->diag));
2289   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
2290   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
2291   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
2292   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
2293 
2294   A->info.mallocs += a->reallocs;
2295   a->reallocs         = 0;
2296   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2297   a->rmax             = rmax;
2298 
2299   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
2300   PetscFunctionReturn(PETSC_SUCCESS);
2301 }
2302 
2303 /*
2304    This function returns an array of flags which indicate the locations of contiguous
2305    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2306    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2307    Assume: sizes should be long enough to hold all the values.
2308 */
2309 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2310 {
2311   PetscInt j = 0;
2312 
2313   PetscFunctionBegin;
2314   for (PetscInt i = 0; i < n; j++) {
2315     PetscInt row = idx[i];
2316     if (row % bs != 0) { /* Not the beginning of a block */
2317       sizes[j] = 1;
2318       i++;
2319     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2320       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2321       i++;
2322     } else { /* Beginning of the block, so check if the complete block exists */
2323       PetscBool flg = PETSC_TRUE;
2324       for (PetscInt k = 1; k < bs; k++) {
2325         if (row + k != idx[i + k]) { /* break in the block */
2326           flg = PETSC_FALSE;
2327           break;
2328         }
2329       }
2330       if (flg) { /* No break in the bs */
2331         sizes[j] = bs;
2332         i += bs;
2333       } else {
2334         sizes[j] = 1;
2335         i++;
2336       }
2337     }
2338   }
2339   *bs_max = j;
2340   PetscFunctionReturn(PETSC_SUCCESS);
2341 }
2342 
2343 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2344 {
2345   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2346   PetscInt           i, j, k, count, *rows;
2347   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
2348   PetscScalar        zero = 0.0;
2349   MatScalar         *aa;
2350   const PetscScalar *xx;
2351   PetscScalar       *bb;
2352 
2353   PetscFunctionBegin;
2354   /* fix right hand side if needed */
2355   if (x && b) {
2356     PetscCall(VecGetArrayRead(x, &xx));
2357     PetscCall(VecGetArray(b, &bb));
2358     for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
2359     PetscCall(VecRestoreArrayRead(x, &xx));
2360     PetscCall(VecRestoreArray(b, &bb));
2361   }
2362 
2363   /* Make a copy of the IS and  sort it */
2364   /* allocate memory for rows,sizes */
2365   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2366 
2367   /* copy IS values to rows, and sort them */
2368   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
2369   PetscCall(PetscSortInt(is_n, rows));
2370 
2371   if (baij->keepnonzeropattern) {
2372     for (i = 0; i < is_n; i++) sizes[i] = 1;
2373     bs_max = is_n;
2374   } else {
2375     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2376     A->nonzerostate++;
2377   }
2378 
2379   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2380     row = rows[j];
2381     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2382     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2383     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
2384     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2385       if (diag != (PetscScalar)0.0) {
2386         if (baij->ilen[row / bs] > 0) {
2387           baij->ilen[row / bs]       = 1;
2388           baij->j[baij->i[row / bs]] = row / bs;
2389 
2390           PetscCall(PetscArrayzero(aa, count * bs));
2391         }
2392         /* Now insert all the diagonal values for this bs */
2393         for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES));
2394       } else { /* (diag == 0.0) */
2395         baij->ilen[row / bs] = 0;
2396       }      /* end (diag == 0.0) */
2397     } else { /* (sizes[i] != bs) */
2398       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2399       for (k = 0; k < count; k++) {
2400         aa[0] = zero;
2401         aa += bs;
2402       }
2403       if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES));
2404     }
2405   }
2406 
2407   PetscCall(PetscFree2(rows, sizes));
2408   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2409   PetscFunctionReturn(PETSC_SUCCESS);
2410 }
2411 
2412 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2413 {
2414   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2415   PetscInt           i, j, k, count;
2416   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
2417   PetscScalar        zero = 0.0;
2418   MatScalar         *aa;
2419   const PetscScalar *xx;
2420   PetscScalar       *bb;
2421   PetscBool         *zeroed, vecs = PETSC_FALSE;
2422 
2423   PetscFunctionBegin;
2424   /* fix right hand side if needed */
2425   if (x && b) {
2426     PetscCall(VecGetArrayRead(x, &xx));
2427     PetscCall(VecGetArray(b, &bb));
2428     vecs = PETSC_TRUE;
2429   }
2430 
2431   /* zero the columns */
2432   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
2433   for (i = 0; i < is_n; i++) {
2434     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
2435     zeroed[is_idx[i]] = PETSC_TRUE;
2436   }
2437   for (i = 0; i < A->rmap->N; i++) {
2438     if (!zeroed[i]) {
2439       row = i / bs;
2440       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
2441         for (k = 0; k < bs; k++) {
2442           col = bs * baij->j[j] + k;
2443           if (zeroed[col]) {
2444             aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
2445             if (vecs) bb[i] -= aa[0] * xx[col];
2446             aa[0] = 0.0;
2447           }
2448         }
2449       }
2450     } else if (vecs) bb[i] = diag * xx[i];
2451   }
2452   PetscCall(PetscFree(zeroed));
2453   if (vecs) {
2454     PetscCall(VecRestoreArrayRead(x, &xx));
2455     PetscCall(VecRestoreArray(b, &bb));
2456   }
2457 
2458   /* zero the rows */
2459   for (i = 0; i < is_n; i++) {
2460     row   = is_idx[i];
2461     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2462     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
2463     for (k = 0; k < count; k++) {
2464       aa[0] = zero;
2465       aa += bs;
2466     }
2467     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
2468   }
2469   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2470   PetscFunctionReturn(PETSC_SUCCESS);
2471 }
2472 
2473 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2474 {
2475   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2476   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2477   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2478   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2479   PetscInt     ridx, cidx, bs2                 = a->bs2;
2480   PetscBool    roworiented = a->roworiented;
2481   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;
2482 
2483   PetscFunctionBegin;
2484   for (k = 0; k < m; k++) { /* loop over added rows */
2485     row  = im[k];
2486     brow = row / bs;
2487     if (row < 0) continue;
2488     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
2489     rp = aj + ai[brow];
2490     if (!A->structure_only) ap = aa + bs2 * ai[brow];
2491     rmax = imax[brow];
2492     nrow = ailen[brow];
2493     low  = 0;
2494     high = nrow;
2495     for (l = 0; l < n; l++) { /* loop over added columns */
2496       if (in[l] < 0) continue;
2497       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
2498       col  = in[l];
2499       bcol = col / bs;
2500       ridx = row % bs;
2501       cidx = col % bs;
2502       if (!A->structure_only) {
2503         if (roworiented) {
2504           value = v[l + k * n];
2505         } else {
2506           value = v[k + l * m];
2507         }
2508       }
2509       if (col <= lastcol) low = 0;
2510       else high = nrow;
2511       lastcol = col;
2512       while (high - low > 7) {
2513         t = (low + high) / 2;
2514         if (rp[t] > bcol) high = t;
2515         else low = t;
2516       }
2517       for (i = low; i < high; i++) {
2518         if (rp[i] > bcol) break;
2519         if (rp[i] == bcol) {
2520           bap = ap + bs2 * i + bs * cidx + ridx;
2521           if (!A->structure_only) {
2522             if (is == ADD_VALUES) *bap += value;
2523             else *bap = value;
2524           }
2525           goto noinsert1;
2526         }
2527       }
2528       if (nonew == 1) goto noinsert1;
2529       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2530       if (A->structure_only) {
2531         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2532       } else {
2533         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2534       }
2535       N = nrow++ - 1;
2536       high++;
2537       /* shift up all the later entries in this row */
2538       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2539       rp[i] = bcol;
2540       if (!A->structure_only) {
2541         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2542         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2543         ap[bs2 * i + bs * cidx + ridx] = value;
2544       }
2545       a->nz++;
2546       A->nonzerostate++;
2547     noinsert1:;
2548       low = i;
2549     }
2550     ailen[brow] = nrow;
2551   }
2552   PetscFunctionReturn(PETSC_SUCCESS);
2553 }
2554 
2555 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2556 {
2557   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
2558   Mat          outA;
2559   PetscBool    row_identity, col_identity;
2560 
2561   PetscFunctionBegin;
2562   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
2563   PetscCall(ISIdentity(row, &row_identity));
2564   PetscCall(ISIdentity(col, &col_identity));
2565   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
2566 
2567   outA            = inA;
2568   inA->factortype = MAT_FACTOR_LU;
2569   PetscCall(PetscFree(inA->solvertype));
2570   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
2571 
2572   PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2573 
2574   PetscCall(PetscObjectReference((PetscObject)row));
2575   PetscCall(ISDestroy(&a->row));
2576   a->row = row;
2577   PetscCall(PetscObjectReference((PetscObject)col));
2578   PetscCall(ISDestroy(&a->col));
2579   a->col = col;
2580 
2581   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2582   PetscCall(ISDestroy(&a->icol));
2583   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2584 
2585   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2586   if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
2587   PetscCall(MatLUFactorNumeric(outA, inA, info));
2588   PetscFunctionReturn(PETSC_SUCCESS);
2589 }
2590 
2591 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2592 {
2593   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2594 
2595   PetscFunctionBegin;
2596   baij->nz = baij->maxnz;
2597   PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2598   PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
2599   PetscFunctionReturn(PETSC_SUCCESS);
2600 }
2601 
2602 /*@
2603     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix.
2604 
2605   Input Parameters:
2606 +  mat - the `MATSEQBAIJ` matrix
2607 -  indices - the column indices
2608 
2609   Level: advanced
2610 
2611   Notes:
2612     This can be called if you have precomputed the nonzero structure of the
2613   matrix and want to provide it to the matrix object to improve the performance
2614   of the `MatSetValues()` operation.
2615 
2616     You MUST have set the correct numbers of nonzeros per row in the call to
2617   `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
2618 
2619     MUST be called before any calls to `MatSetValues()`
2620 
2621 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()`
2622 @*/
2623 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2624 {
2625   PetscFunctionBegin;
2626   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2627   PetscValidIntPointer(indices, 2);
2628   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices));
2629   PetscFunctionReturn(PETSC_SUCCESS);
2630 }
2631 
2632 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2633 {
2634   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2635   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2636   PetscReal    atmp;
2637   PetscScalar *x, zero = 0.0;
2638   MatScalar   *aa;
2639   PetscInt     ncols, brow, krow, kcol;
2640 
2641   PetscFunctionBegin;
2642   /* why is this not a macro???????????????????????????????????????????????????????????????? */
2643   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2644   bs  = A->rmap->bs;
2645   aa  = a->a;
2646   ai  = a->i;
2647   aj  = a->j;
2648   mbs = a->mbs;
2649 
2650   PetscCall(VecSet(v, zero));
2651   PetscCall(VecGetArray(v, &x));
2652   PetscCall(VecGetLocalSize(v, &n));
2653   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2654   for (i = 0; i < mbs; i++) {
2655     ncols = ai[1] - ai[0];
2656     ai++;
2657     brow = bs * i;
2658     for (j = 0; j < ncols; j++) {
2659       for (kcol = 0; kcol < bs; kcol++) {
2660         for (krow = 0; krow < bs; krow++) {
2661           atmp = PetscAbsScalar(*aa);
2662           aa++;
2663           row = brow + krow; /* row index */
2664           if (PetscAbsScalar(x[row]) < atmp) {
2665             x[row] = atmp;
2666             if (idx) idx[row] = bs * (*aj) + kcol;
2667           }
2668         }
2669       }
2670       aj++;
2671     }
2672   }
2673   PetscCall(VecRestoreArray(v, &x));
2674   PetscFunctionReturn(PETSC_SUCCESS);
2675 }
2676 
2677 PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2678 {
2679   PetscFunctionBegin;
2680   /* If the two matrices have the same copy implementation, use fast copy. */
2681   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2682     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
2683     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2684     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
2685 
2686     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
2687     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
2688     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
2689     PetscCall(PetscObjectStateIncrease((PetscObject)B));
2690   } else {
2691     PetscCall(MatCopy_Basic(A, B, str));
2692   }
2693   PetscFunctionReturn(PETSC_SUCCESS);
2694 }
2695 
2696 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2697 {
2698   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2699 
2700   PetscFunctionBegin;
2701   *array = a->a;
2702   PetscFunctionReturn(PETSC_SUCCESS);
2703 }
2704 
2705 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2706 {
2707   PetscFunctionBegin;
2708   *array = NULL;
2709   PetscFunctionReturn(PETSC_SUCCESS);
2710 }
2711 
2712 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2713 {
2714   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
2715   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
2716   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
2717 
2718   PetscFunctionBegin;
2719   /* Set the number of nonzeros in the new matrix */
2720   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
2721   PetscFunctionReturn(PETSC_SUCCESS);
2722 }
2723 
2724 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2725 {
2726   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
2727   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2728   PetscBLASInt one = 1;
2729 
2730   PetscFunctionBegin;
2731   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2732     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2733     if (e) {
2734       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2735       if (e) {
2736         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2737         if (e) str = SAME_NONZERO_PATTERN;
2738       }
2739     }
2740     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2741   }
2742   if (str == SAME_NONZERO_PATTERN) {
2743     PetscScalar  alpha = a;
2744     PetscBLASInt bnz;
2745     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2746     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
2747     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2748   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2749     PetscCall(MatAXPY_Basic(Y, a, X, str));
2750   } else {
2751     Mat       B;
2752     PetscInt *nnz;
2753     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
2754     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
2755     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2756     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2757     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
2758     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
2759     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
2760     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
2761     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
2762     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2763     PetscCall(MatHeaderMerge(Y, &B));
2764     PetscCall(PetscFree(nnz));
2765   }
2766   PetscFunctionReturn(PETSC_SUCCESS);
2767 }
2768 
2769 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2770 {
2771 #if PetscDefined(USE_COMPLEX)
2772   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2773   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2774   MatScalar   *aa = a->a;
2775 
2776   PetscFunctionBegin;
2777   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
2778   PetscFunctionReturn(PETSC_SUCCESS);
2779 #else
2780   (void)A;
2781   return PETSC_SUCCESS;
2782 #endif
2783 }
2784 
2785 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2786 {
2787 #if PetscDefined(USE_COMPLEX)
2788   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2789   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2790   MatScalar   *aa = a->a;
2791 
2792   PetscFunctionBegin;
2793   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
2794   PetscFunctionReturn(PETSC_SUCCESS);
2795 #else
2796   (void)A;
2797   return PETSC_SUCCESS;
2798 #endif
2799 }
2800 
2801 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2802 {
2803 #if PetscDefined(USE_COMPLEX)
2804   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2805   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2806   MatScalar   *aa = a->a;
2807 
2808   PetscFunctionBegin;
2809   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2810   PetscFunctionReturn(PETSC_SUCCESS);
2811 #else
2812   (void)A;
2813   return PETSC_SUCCESS;
2814 #endif
2815 }
2816 
2817 /*
2818     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2819 */
2820 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2821 {
2822   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
2823   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
2824   PetscInt     nz = a->i[m], row, *jj, mr, col;
2825 
2826   PetscFunctionBegin;
2827   *nn = n;
2828   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2829   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
2830   PetscCall(PetscCalloc1(n, &collengths));
2831   PetscCall(PetscMalloc1(n + 1, &cia));
2832   PetscCall(PetscMalloc1(nz, &cja));
2833   jj = a->j;
2834   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2835   cia[0] = oshift;
2836   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2837   PetscCall(PetscArrayzero(collengths, n));
2838   jj = a->j;
2839   for (row = 0; row < m; row++) {
2840     mr = a->i[row + 1] - a->i[row];
2841     for (i = 0; i < mr; i++) {
2842       col = *jj++;
2843 
2844       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2845     }
2846   }
2847   PetscCall(PetscFree(collengths));
2848   *ia = cia;
2849   *ja = cja;
2850   PetscFunctionReturn(PETSC_SUCCESS);
2851 }
2852 
2853 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2854 {
2855   PetscFunctionBegin;
2856   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2857   PetscCall(PetscFree(*ia));
2858   PetscCall(PetscFree(*ja));
2859   PetscFunctionReturn(PETSC_SUCCESS);
2860 }
2861 
2862 /*
2863  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2864  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2865  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2866  */
2867 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2868 {
2869   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2870   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2871   PetscInt     nz = a->i[m], row, *jj, mr, col;
2872   PetscInt    *cspidx;
2873 
2874   PetscFunctionBegin;
2875   *nn = n;
2876   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2877 
2878   PetscCall(PetscCalloc1(n, &collengths));
2879   PetscCall(PetscMalloc1(n + 1, &cia));
2880   PetscCall(PetscMalloc1(nz, &cja));
2881   PetscCall(PetscMalloc1(nz, &cspidx));
2882   jj = a->j;
2883   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2884   cia[0] = oshift;
2885   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2886   PetscCall(PetscArrayzero(collengths, n));
2887   jj = a->j;
2888   for (row = 0; row < m; row++) {
2889     mr = a->i[row + 1] - a->i[row];
2890     for (i = 0; i < mr; i++) {
2891       col                                         = *jj++;
2892       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2893       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2894     }
2895   }
2896   PetscCall(PetscFree(collengths));
2897   *ia    = cia;
2898   *ja    = cja;
2899   *spidx = cspidx;
2900   PetscFunctionReturn(PETSC_SUCCESS);
2901 }
2902 
2903 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2904 {
2905   PetscFunctionBegin;
2906   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
2907   PetscCall(PetscFree(*spidx));
2908   PetscFunctionReturn(PETSC_SUCCESS);
2909 }
2910 
2911 PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2912 {
2913   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
2914 
2915   PetscFunctionBegin;
2916   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
2917   PetscCall(MatShift_Basic(Y, a));
2918   PetscFunctionReturn(PETSC_SUCCESS);
2919 }
2920 
2921 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2922                                        MatGetRow_SeqBAIJ,
2923                                        MatRestoreRow_SeqBAIJ,
2924                                        MatMult_SeqBAIJ_N,
2925                                        /* 4*/ MatMultAdd_SeqBAIJ_N,
2926                                        MatMultTranspose_SeqBAIJ,
2927                                        MatMultTransposeAdd_SeqBAIJ,
2928                                        NULL,
2929                                        NULL,
2930                                        NULL,
2931                                        /* 10*/ NULL,
2932                                        MatLUFactor_SeqBAIJ,
2933                                        NULL,
2934                                        NULL,
2935                                        MatTranspose_SeqBAIJ,
2936                                        /* 15*/ MatGetInfo_SeqBAIJ,
2937                                        MatEqual_SeqBAIJ,
2938                                        MatGetDiagonal_SeqBAIJ,
2939                                        MatDiagonalScale_SeqBAIJ,
2940                                        MatNorm_SeqBAIJ,
2941                                        /* 20*/ NULL,
2942                                        MatAssemblyEnd_SeqBAIJ,
2943                                        MatSetOption_SeqBAIJ,
2944                                        MatZeroEntries_SeqBAIJ,
2945                                        /* 24*/ MatZeroRows_SeqBAIJ,
2946                                        NULL,
2947                                        NULL,
2948                                        NULL,
2949                                        NULL,
2950                                        /* 29*/ MatSetUp_Seq_Hash,
2951                                        NULL,
2952                                        NULL,
2953                                        NULL,
2954                                        NULL,
2955                                        /* 34*/ MatDuplicate_SeqBAIJ,
2956                                        NULL,
2957                                        NULL,
2958                                        MatILUFactor_SeqBAIJ,
2959                                        NULL,
2960                                        /* 39*/ MatAXPY_SeqBAIJ,
2961                                        MatCreateSubMatrices_SeqBAIJ,
2962                                        MatIncreaseOverlap_SeqBAIJ,
2963                                        MatGetValues_SeqBAIJ,
2964                                        MatCopy_SeqBAIJ,
2965                                        /* 44*/ NULL,
2966                                        MatScale_SeqBAIJ,
2967                                        MatShift_SeqBAIJ,
2968                                        NULL,
2969                                        MatZeroRowsColumns_SeqBAIJ,
2970                                        /* 49*/ NULL,
2971                                        MatGetRowIJ_SeqBAIJ,
2972                                        MatRestoreRowIJ_SeqBAIJ,
2973                                        MatGetColumnIJ_SeqBAIJ,
2974                                        MatRestoreColumnIJ_SeqBAIJ,
2975                                        /* 54*/ MatFDColoringCreate_SeqXAIJ,
2976                                        NULL,
2977                                        NULL,
2978                                        NULL,
2979                                        MatSetValuesBlocked_SeqBAIJ,
2980                                        /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2981                                        MatDestroy_SeqBAIJ,
2982                                        MatView_SeqBAIJ,
2983                                        NULL,
2984                                        NULL,
2985                                        /* 64*/ NULL,
2986                                        NULL,
2987                                        NULL,
2988                                        NULL,
2989                                        NULL,
2990                                        /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2991                                        NULL,
2992                                        MatConvert_Basic,
2993                                        NULL,
2994                                        NULL,
2995                                        /* 74*/ NULL,
2996                                        MatFDColoringApply_BAIJ,
2997                                        NULL,
2998                                        NULL,
2999                                        NULL,
3000                                        /* 79*/ NULL,
3001                                        NULL,
3002                                        NULL,
3003                                        NULL,
3004                                        MatLoad_SeqBAIJ,
3005                                        /* 84*/ NULL,
3006                                        NULL,
3007                                        NULL,
3008                                        NULL,
3009                                        NULL,
3010                                        /* 89*/ NULL,
3011                                        NULL,
3012                                        NULL,
3013                                        NULL,
3014                                        NULL,
3015                                        /* 94*/ NULL,
3016                                        NULL,
3017                                        NULL,
3018                                        NULL,
3019                                        NULL,
3020                                        /* 99*/ NULL,
3021                                        NULL,
3022                                        NULL,
3023                                        MatConjugate_SeqBAIJ,
3024                                        NULL,
3025                                        /*104*/ NULL,
3026                                        MatRealPart_SeqBAIJ,
3027                                        MatImaginaryPart_SeqBAIJ,
3028                                        NULL,
3029                                        NULL,
3030                                        /*109*/ NULL,
3031                                        NULL,
3032                                        NULL,
3033                                        NULL,
3034                                        MatMissingDiagonal_SeqBAIJ,
3035                                        /*114*/ NULL,
3036                                        NULL,
3037                                        NULL,
3038                                        NULL,
3039                                        NULL,
3040                                        /*119*/ NULL,
3041                                        NULL,
3042                                        MatMultHermitianTranspose_SeqBAIJ,
3043                                        MatMultHermitianTransposeAdd_SeqBAIJ,
3044                                        NULL,
3045                                        /*124*/ NULL,
3046                                        MatGetColumnReductions_SeqBAIJ,
3047                                        MatInvertBlockDiagonal_SeqBAIJ,
3048                                        NULL,
3049                                        NULL,
3050                                        /*129*/ NULL,
3051                                        NULL,
3052                                        NULL,
3053                                        NULL,
3054                                        NULL,
3055                                        /*134*/ NULL,
3056                                        NULL,
3057                                        NULL,
3058                                        NULL,
3059                                        NULL,
3060                                        /*139*/ MatSetBlockSizes_Default,
3061                                        NULL,
3062                                        NULL,
3063                                        MatFDColoringSetUp_SeqXAIJ,
3064                                        NULL,
3065                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3066                                        MatDestroySubMatrices_SeqBAIJ,
3067                                        NULL,
3068                                        NULL,
3069                                        NULL,
3070                                        NULL,
3071                                        /*150*/ NULL,
3072                                        NULL};
3073 
3074 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3075 {
3076   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3077   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3078 
3079   PetscFunctionBegin;
3080   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3081 
3082   /* allocate space for values if not already there */
3083   if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
3084 
3085   /* copy values over */
3086   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
3087   PetscFunctionReturn(PETSC_SUCCESS);
3088 }
3089 
3090 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3091 {
3092   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3093   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3094 
3095   PetscFunctionBegin;
3096   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3097   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
3098 
3099   /* copy values over */
3100   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
3101   PetscFunctionReturn(PETSC_SUCCESS);
3102 }
3103 
3104 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3105 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3106 
3107 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, PetscInt *nnz)
3108 {
3109   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
3110   PetscInt     i, mbs, nbs, bs2;
3111   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3112 
3113   PetscFunctionBegin;
3114   if (B->hash_active) {
3115     PetscInt bs;
3116     B->ops[0] = b->cops;
3117     PetscCall(PetscHMapIJVDestroy(&b->ht));
3118     PetscCall(MatGetBlockSize(B, &bs));
3119     if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht));
3120     PetscCall(PetscFree(b->dnz));
3121     PetscCall(PetscFree(b->bdnz));
3122     B->hash_active = PETSC_FALSE;
3123   }
3124   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3125   if (nz == MAT_SKIP_ALLOCATION) {
3126     skipallocation = PETSC_TRUE;
3127     nz             = 0;
3128   }
3129 
3130   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
3131   PetscCall(PetscLayoutSetUp(B->rmap));
3132   PetscCall(PetscLayoutSetUp(B->cmap));
3133   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3134 
3135   B->preallocated = PETSC_TRUE;
3136 
3137   mbs = B->rmap->n / bs;
3138   nbs = B->cmap->n / bs;
3139   bs2 = bs * bs;
3140 
3141   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3142 
3143   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3144   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3145   if (nnz) {
3146     for (i = 0; i < mbs; i++) {
3147       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
3148       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3149     }
3150   }
3151 
3152   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
3153   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3154   PetscOptionsEnd();
3155 
3156   if (!flg) {
3157     switch (bs) {
3158     case 1:
3159       B->ops->mult    = MatMult_SeqBAIJ_1;
3160       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3161       break;
3162     case 2:
3163       B->ops->mult    = MatMult_SeqBAIJ_2;
3164       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3165       break;
3166     case 3:
3167       B->ops->mult    = MatMult_SeqBAIJ_3;
3168       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3169       break;
3170     case 4:
3171       B->ops->mult    = MatMult_SeqBAIJ_4;
3172       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3173       break;
3174     case 5:
3175       B->ops->mult    = MatMult_SeqBAIJ_5;
3176       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3177       break;
3178     case 6:
3179       B->ops->mult    = MatMult_SeqBAIJ_6;
3180       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3181       break;
3182     case 7:
3183       B->ops->mult    = MatMult_SeqBAIJ_7;
3184       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3185       break;
3186     case 9: {
3187       PetscInt version = 1;
3188       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3189       switch (version) {
3190 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3191       case 1:
3192         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
3193         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
3194         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3195         break;
3196 #endif
3197       default:
3198         B->ops->mult    = MatMult_SeqBAIJ_N;
3199         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3200         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3201         break;
3202       }
3203       break;
3204     }
3205     case 11:
3206       B->ops->mult    = MatMult_SeqBAIJ_11;
3207       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3208       break;
3209     case 12: {
3210       PetscInt version = 1;
3211       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3212       switch (version) {
3213       case 1:
3214         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
3215         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3216         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3217         break;
3218       case 2:
3219         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
3220         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
3221         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3222         break;
3223 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3224       case 3:
3225         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
3226         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3227         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3228         break;
3229 #endif
3230       default:
3231         B->ops->mult    = MatMult_SeqBAIJ_N;
3232         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3233         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3234         break;
3235       }
3236       break;
3237     }
3238     case 15: {
3239       PetscInt version = 1;
3240       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3241       switch (version) {
3242       case 1:
3243         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3244         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3245         break;
3246       case 2:
3247         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
3248         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3249         break;
3250       case 3:
3251         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3252         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3253         break;
3254       case 4:
3255         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3256         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3257         break;
3258       default:
3259         B->ops->mult = MatMult_SeqBAIJ_N;
3260         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3261         break;
3262       }
3263       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3264       break;
3265     }
3266     default:
3267       B->ops->mult    = MatMult_SeqBAIJ_N;
3268       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3269       PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3270       break;
3271     }
3272   }
3273   B->ops->sor = MatSOR_SeqBAIJ;
3274   b->mbs      = mbs;
3275   b->nbs      = nbs;
3276   if (!skipallocation) {
3277     if (!b->imax) {
3278       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
3279 
3280       b->free_imax_ilen = PETSC_TRUE;
3281     }
3282     /* b->ilen will count nonzeros in each block row so far. */
3283     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3284     if (!nnz) {
3285       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3286       else if (nz < 0) nz = 1;
3287       nz = PetscMin(nz, nbs);
3288       for (i = 0; i < mbs; i++) b->imax[i] = nz;
3289       PetscCall(PetscIntMultError(nz, mbs, &nz));
3290     } else {
3291       PetscInt64 nz64 = 0;
3292       for (i = 0; i < mbs; i++) {
3293         b->imax[i] = nnz[i];
3294         nz64 += nnz[i];
3295       }
3296       PetscCall(PetscIntCast(nz64, &nz));
3297     }
3298 
3299     /* allocate the matrix space */
3300     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3301     if (B->structure_only) {
3302       PetscCall(PetscMalloc1(nz, &b->j));
3303       PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i));
3304     } else {
3305       PetscInt nzbs2 = 0;
3306       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
3307       PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i));
3308       PetscCall(PetscArrayzero(b->a, nz * bs2));
3309     }
3310     PetscCall(PetscArrayzero(b->j, nz));
3311 
3312     if (B->structure_only) {
3313       b->singlemalloc = PETSC_FALSE;
3314       b->free_a       = PETSC_FALSE;
3315     } else {
3316       b->singlemalloc = PETSC_TRUE;
3317       b->free_a       = PETSC_TRUE;
3318     }
3319     b->free_ij = PETSC_TRUE;
3320 
3321     b->i[0] = 0;
3322     for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3323 
3324   } else {
3325     b->free_a  = PETSC_FALSE;
3326     b->free_ij = PETSC_FALSE;
3327   }
3328 
3329   b->bs2              = bs2;
3330   b->mbs              = mbs;
3331   b->nz               = 0;
3332   b->maxnz            = nz;
3333   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3334   B->was_assembled    = PETSC_FALSE;
3335   B->assembled        = PETSC_FALSE;
3336   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3337   PetscFunctionReturn(PETSC_SUCCESS);
3338 }
3339 
3340 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3341 {
3342   PetscInt     i, m, nz, nz_max = 0, *nnz;
3343   PetscScalar *values      = NULL;
3344   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3345 
3346   PetscFunctionBegin;
3347   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
3348   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
3349   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
3350   PetscCall(PetscLayoutSetUp(B->rmap));
3351   PetscCall(PetscLayoutSetUp(B->cmap));
3352   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3353   m = B->rmap->n / bs;
3354 
3355   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
3356   PetscCall(PetscMalloc1(m + 1, &nnz));
3357   for (i = 0; i < m; i++) {
3358     nz = ii[i + 1] - ii[i];
3359     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3360     nz_max = PetscMax(nz_max, nz);
3361     nnz[i] = nz;
3362   }
3363   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
3364   PetscCall(PetscFree(nnz));
3365 
3366   values = (PetscScalar *)V;
3367   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3368   for (i = 0; i < m; i++) {
3369     PetscInt        ncols = ii[i + 1] - ii[i];
3370     const PetscInt *icols = jj + ii[i];
3371     if (bs == 1 || !roworiented) {
3372       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
3373       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
3374     } else {
3375       PetscInt j;
3376       for (j = 0; j < ncols; j++) {
3377         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
3378         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
3379       }
3380     }
3381   }
3382   if (!V) PetscCall(PetscFree(values));
3383   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3384   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3385   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3386   PetscFunctionReturn(PETSC_SUCCESS);
3387 }
3388 
3389 /*@C
3390    MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3391 
3392    Not Collective
3393 
3394    Input Parameter:
3395 .  mat - a `MATSEQBAIJ` matrix
3396 
3397    Output Parameter:
3398 .   array - pointer to the data
3399 
3400    Level: intermediate
3401 
3402 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3403 @*/
3404 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array)
3405 {
3406   PetscFunctionBegin;
3407   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
3408   PetscFunctionReturn(PETSC_SUCCESS);
3409 }
3410 
3411 /*@C
3412    MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3413 
3414    Not Collective
3415 
3416    Input Parameters:
3417 +  mat - a `MATSEQBAIJ` matrix
3418 -  array - pointer to the data
3419 
3420    Level: intermediate
3421 
3422 .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3423 @*/
3424 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array)
3425 {
3426   PetscFunctionBegin;
3427   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
3428   PetscFunctionReturn(PETSC_SUCCESS);
3429 }
3430 
3431 /*MC
3432    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3433    block sparse compressed row format.
3434 
3435    Options Database Keys:
3436 + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()`
3437 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3438 
3439    Level: beginner
3440 
3441    Notes:
3442     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
3443     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
3444 
3445    Run with `-info` to see what version of the matrix-vector product is being used
3446 
3447 .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()`
3448 M*/
3449 
3450 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3451 
3452 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3453 {
3454   PetscMPIInt  size;
3455   Mat_SeqBAIJ *b;
3456 
3457   PetscFunctionBegin;
3458   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
3459   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3460 
3461   PetscCall(PetscNew(&b));
3462   B->data   = (void *)b;
3463   B->ops[0] = MatOps_Values;
3464 
3465   b->row          = NULL;
3466   b->col          = NULL;
3467   b->icol         = NULL;
3468   b->reallocs     = 0;
3469   b->saved_values = NULL;
3470 
3471   b->roworiented        = PETSC_TRUE;
3472   b->nonew              = 0;
3473   b->diag               = NULL;
3474   B->spptr              = NULL;
3475   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3476   b->keepnonzeropattern = PETSC_FALSE;
3477 
3478   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
3479   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
3480   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
3481   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
3482   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
3483   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
3484   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
3485   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
3486   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
3487   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
3488 #if defined(PETSC_HAVE_HYPRE)
3489   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
3490 #endif
3491   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
3492   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
3493   PetscFunctionReturn(PETSC_SUCCESS);
3494 }
3495 
3496 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3497 {
3498   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3499   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3500 
3501   PetscFunctionBegin;
3502   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix");
3503   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
3504 
3505   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3506     c->imax           = a->imax;
3507     c->ilen           = a->ilen;
3508     c->free_imax_ilen = PETSC_FALSE;
3509   } else {
3510     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3511     for (i = 0; i < mbs; i++) {
3512       c->imax[i] = a->imax[i];
3513       c->ilen[i] = a->ilen[i];
3514     }
3515     c->free_imax_ilen = PETSC_TRUE;
3516   }
3517 
3518   /* allocate the matrix space */
3519   if (mallocmatspace) {
3520     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3521       PetscCall(PetscCalloc1(bs2 * nz, &c->a));
3522 
3523       c->i            = a->i;
3524       c->j            = a->j;
3525       c->singlemalloc = PETSC_FALSE;
3526       c->free_a       = PETSC_TRUE;
3527       c->free_ij      = PETSC_FALSE;
3528       c->parent       = A;
3529       C->preallocated = PETSC_TRUE;
3530       C->assembled    = PETSC_TRUE;
3531 
3532       PetscCall(PetscObjectReference((PetscObject)A));
3533       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3534       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3535     } else {
3536       PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i));
3537 
3538       c->singlemalloc = PETSC_TRUE;
3539       c->free_a       = PETSC_TRUE;
3540       c->free_ij      = PETSC_TRUE;
3541 
3542       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3543       if (mbs > 0) {
3544         PetscCall(PetscArraycpy(c->j, a->j, nz));
3545         if (cpvalues == MAT_COPY_VALUES) {
3546           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
3547         } else {
3548           PetscCall(PetscArrayzero(c->a, bs2 * nz));
3549         }
3550       }
3551       C->preallocated = PETSC_TRUE;
3552       C->assembled    = PETSC_TRUE;
3553     }
3554   }
3555 
3556   c->roworiented = a->roworiented;
3557   c->nonew       = a->nonew;
3558 
3559   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
3560   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
3561 
3562   c->bs2 = a->bs2;
3563   c->mbs = a->mbs;
3564   c->nbs = a->nbs;
3565 
3566   if (a->diag) {
3567     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3568       c->diag      = a->diag;
3569       c->free_diag = PETSC_FALSE;
3570     } else {
3571       PetscCall(PetscMalloc1(mbs + 1, &c->diag));
3572       for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i];
3573       c->free_diag = PETSC_TRUE;
3574     }
3575   } else c->diag = NULL;
3576 
3577   c->nz         = a->nz;
3578   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3579   c->solve_work = NULL;
3580   c->mult_work  = NULL;
3581   c->sor_workt  = NULL;
3582   c->sor_work   = NULL;
3583 
3584   c->compressedrow.use   = a->compressedrow.use;
3585   c->compressedrow.nrows = a->compressedrow.nrows;
3586   if (a->compressedrow.use) {
3587     i = a->compressedrow.nrows;
3588     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
3589     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
3590     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
3591   } else {
3592     c->compressedrow.use    = PETSC_FALSE;
3593     c->compressedrow.i      = NULL;
3594     c->compressedrow.rindex = NULL;
3595   }
3596   C->nonzerostate = A->nonzerostate;
3597 
3598   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
3599   PetscFunctionReturn(PETSC_SUCCESS);
3600 }
3601 
3602 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3603 {
3604   PetscFunctionBegin;
3605   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
3606   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
3607   PetscCall(MatSetType(*B, MATSEQBAIJ));
3608   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
3609   PetscFunctionReturn(PETSC_SUCCESS);
3610 }
3611 
3612 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3613 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3614 {
3615   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3616   PetscInt    *rowidxs, *colidxs;
3617   PetscScalar *matvals;
3618 
3619   PetscFunctionBegin;
3620   PetscCall(PetscViewerSetUp(viewer));
3621 
3622   /* read matrix header */
3623   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3624   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3625   M  = header[1];
3626   N  = header[2];
3627   nz = header[3];
3628   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3629   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3630   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3631 
3632   /* set block sizes from the viewer's .info file */
3633   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3634   /* set local and global sizes if not set already */
3635   if (mat->rmap->n < 0) mat->rmap->n = M;
3636   if (mat->cmap->n < 0) mat->cmap->n = N;
3637   if (mat->rmap->N < 0) mat->rmap->N = M;
3638   if (mat->cmap->N < 0) mat->cmap->N = N;
3639   PetscCall(PetscLayoutSetUp(mat->rmap));
3640   PetscCall(PetscLayoutSetUp(mat->cmap));
3641 
3642   /* check if the matrix sizes are correct */
3643   PetscCall(MatGetSize(mat, &rows, &cols));
3644   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3645   PetscCall(MatGetBlockSize(mat, &bs));
3646   PetscCall(MatGetLocalSize(mat, &m, &n));
3647   mbs = m / bs;
3648   nbs = n / bs;
3649 
3650   /* read in row lengths, column indices and nonzero values */
3651   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3652   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
3653   rowidxs[0] = 0;
3654   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3655   sum = rowidxs[m];
3656   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3657 
3658   /* read in column indices and nonzero values */
3659   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
3660   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
3661   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3662 
3663   {               /* preallocate matrix storage */
3664     PetscBT   bt; /* helper bit set to count nonzeros */
3665     PetscInt *nnz;
3666     PetscBool sbaij;
3667 
3668     PetscCall(PetscBTCreate(nbs, &bt));
3669     PetscCall(PetscCalloc1(mbs, &nnz));
3670     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3671     for (i = 0; i < mbs; i++) {
3672       PetscCall(PetscBTMemzero(nbs, bt));
3673       for (k = 0; k < bs; k++) {
3674         PetscInt row = bs * i + k;
3675         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3676           PetscInt col = colidxs[j];
3677           if (!sbaij || col >= row)
3678             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3679         }
3680       }
3681     }
3682     PetscCall(PetscBTDestroy(&bt));
3683     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
3684     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
3685     PetscCall(PetscFree(nnz));
3686   }
3687 
3688   /* store matrix values */
3689   for (i = 0; i < m; i++) {
3690     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
3691     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3692   }
3693 
3694   PetscCall(PetscFree(rowidxs));
3695   PetscCall(PetscFree2(colidxs, matvals));
3696   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3697   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3698   PetscFunctionReturn(PETSC_SUCCESS);
3699 }
3700 
3701 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3702 {
3703   PetscBool isbinary;
3704 
3705   PetscFunctionBegin;
3706   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3707   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3708   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
3709   PetscFunctionReturn(PETSC_SUCCESS);
3710 }
3711 
3712 /*@C
3713    MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3714    compressed row) format.  For good matrix assembly performance the
3715    user should preallocate the matrix storage by setting the parameter `nz`
3716    (or the array `nnz`).
3717 
3718    Collective
3719 
3720    Input Parameters:
3721 +  comm - MPI communicator, set to `PETSC_COMM_SELF`
3722 .  bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3723           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3724 .  m - number of rows
3725 .  n - number of columns
3726 .  nz - number of nonzero blocks  per block row (same for all rows)
3727 -  nnz - array containing the number of nonzero blocks in the various block rows
3728          (possibly different for each block row) or `NULL`
3729 
3730    Output Parameter:
3731 .  A - the matrix
3732 
3733    Options Database Keys:
3734 +   -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3735 -   -mat_block_size - size of the blocks to use
3736 
3737    Level: intermediate
3738 
3739    Notes:
3740    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3741    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3742    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
3743 
3744    The number of rows and columns must be divisible by blocksize.
3745 
3746    If the `nnz` parameter is given then the `nz` parameter is ignored
3747 
3748    A nonzero block is any block that as 1 or more nonzeros in it
3749 
3750    The `MATSEQBAIJ` format is fully compatible with standard Fortran
3751    storage.  That is, the stored row and column indices can begin at
3752    either one (as in Fortran) or zero.
3753 
3754    Specify the preallocated storage with either `nz` or `nnz` (not both).
3755    Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3756    allocation.  See [Sparse Matrices](sec_matsparse) for details.
3757    matrices.
3758 
3759 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3760 @*/
3761 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3762 {
3763   PetscFunctionBegin;
3764   PetscCall(MatCreate(comm, A));
3765   PetscCall(MatSetSizes(*A, m, n, m, n));
3766   PetscCall(MatSetType(*A, MATSEQBAIJ));
3767   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
3768   PetscFunctionReturn(PETSC_SUCCESS);
3769 }
3770 
3771 /*@C
3772    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3773    per row in the matrix. For good matrix assembly performance the
3774    user should preallocate the matrix storage by setting the parameter `nz`
3775    (or the array `nnz`).
3776 
3777    Collective
3778 
3779    Input Parameters:
3780 +  B - the matrix
3781 .  bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3782           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3783 .  nz - number of block nonzeros per block row (same for all rows)
3784 -  nnz - array containing the number of block nonzeros in the various block rows
3785          (possibly different for each block row) or `NULL`
3786 
3787    Options Database Keys:
3788 +   -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3789 -   -mat_block_size - size of the blocks to use
3790 
3791    Level: intermediate
3792 
3793    Notes:
3794    If the `nnz` parameter is given then the `nz` parameter is ignored
3795 
3796    You can call `MatGetInfo()` to get information on how effective the preallocation was;
3797    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3798    You can also run with the option `-info` and look for messages with the string
3799    malloc in them to see if additional memory allocation was needed.
3800 
3801    The `MATSEQBAIJ` format is fully compatible with standard Fortran
3802    storage.  That is, the stored row and column indices can begin at
3803    either one (as in Fortran) or zero.
3804 
3805    Specify the preallocated storage with either nz or nnz (not both).
3806    Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3807    allocation.  See [Sparse Matrices](sec_matsparse) for details.
3808 
3809 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3810 @*/
3811 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3812 {
3813   PetscFunctionBegin;
3814   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3815   PetscValidType(B, 1);
3816   PetscValidLogicalCollectiveInt(B, bs, 2);
3817   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
3818   PetscFunctionReturn(PETSC_SUCCESS);
3819 }
3820 
3821 /*@C
3822    MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3823 
3824    Collective
3825 
3826    Input Parameters:
3827 +  B - the matrix
3828 .  bs - the blocksize
3829 .  i - the indices into `j` for the start of each local row (starts with zero)
3830 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3831 -  v - optional values in the matrix
3832 
3833    Level: advanced
3834 
3835    Notes:
3836    The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
3837    may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
3838    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3839    `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3840    block column and the second index is over columns within a block.
3841 
3842    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3843 
3844 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3845 @*/
3846 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3847 {
3848   PetscFunctionBegin;
3849   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3850   PetscValidType(B, 1);
3851   PetscValidLogicalCollectiveInt(B, bs, 2);
3852   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
3853   PetscFunctionReturn(PETSC_SUCCESS);
3854 }
3855 
3856 /*@
3857      MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3858 
3859      Collective
3860 
3861    Input Parameters:
3862 +  comm - must be an MPI communicator of size 1
3863 .  bs - size of block
3864 .  m - number of rows
3865 .  n - number of columns
3866 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3867 .  j - column indices
3868 -  a - matrix values
3869 
3870    Output Parameter:
3871 .  mat - the matrix
3872 
3873    Level: advanced
3874 
3875    Notes:
3876        The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays
3877     once the matrix is destroyed
3878 
3879        You cannot set new nonzero locations into this matrix, that will generate an error.
3880 
3881        The `i` and `j` indices are 0 based
3882 
3883        When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3884 
3885       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3886       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3887       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3888       with column-major ordering within blocks.
3889 
3890 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3891 @*/
3892 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3893 {
3894   Mat_SeqBAIJ *baij;
3895 
3896   PetscFunctionBegin;
3897   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
3898   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3899 
3900   PetscCall(MatCreate(comm, mat));
3901   PetscCall(MatSetSizes(*mat, m, n, m, n));
3902   PetscCall(MatSetType(*mat, MATSEQBAIJ));
3903   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3904   baij = (Mat_SeqBAIJ *)(*mat)->data;
3905   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3906 
3907   baij->i = i;
3908   baij->j = j;
3909   baij->a = a;
3910 
3911   baij->singlemalloc   = PETSC_FALSE;
3912   baij->nonew          = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3913   baij->free_a         = PETSC_FALSE;
3914   baij->free_ij        = PETSC_FALSE;
3915   baij->free_imax_ilen = PETSC_TRUE;
3916 
3917   for (PetscInt ii = 0; ii < m; ii++) {
3918     const PetscInt row_len = i[ii + 1] - i[ii];
3919 
3920     baij->ilen[ii] = baij->imax[ii] = row_len;
3921     PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
3922   }
3923   if (PetscDefined(USE_DEBUG)) {
3924     for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
3925       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3926       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3927     }
3928   }
3929 
3930   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3931   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3932   PetscFunctionReturn(PETSC_SUCCESS);
3933 }
3934 
3935 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3936 {
3937   PetscFunctionBegin;
3938   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
3939   PetscFunctionReturn(PETSC_SUCCESS);
3940 }
3941