xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 83d0d507e8eaf8d844b49c5dbd0d8d7c8cefa37b)
1 /*
2     Defines the basic matrix operations for the BAIJ (compressed row)
3   matrix storage format.
4 */
5 #include <../src/mat/impls/baij/seq/baij.h> /*I   "petscmat.h"  I*/
6 #include <petscblaslapack.h>
7 #include <petsc/private/kernels/blockinvert.h>
8 #include <petsc/private/kernels/blockmatmult.h>
9 
10 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */
11 #define TYPE BAIJ
12 #define TYPE_BS
13 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
14 #undef TYPE_BS
15 #define TYPE_BS _BS
16 #define TYPE_BS_ON
17 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
18 #undef TYPE_BS
19 #include "../src/mat/impls/aij/seq/seqhashmat.h"
20 #undef TYPE
21 #undef TYPE_BS_ON
22 
23 #if defined(PETSC_HAVE_HYPRE)
24 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
25 #endif
26 
27 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
28 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
29 #endif
30 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
31 
32 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
33 {
34   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
35   PetscInt     m, n, ib, jb, bs = A->rmap->bs;
36   MatScalar   *a_val = a_aij->a;
37 
38   PetscFunctionBegin;
39   PetscCall(MatGetSize(A, &m, &n));
40   PetscCall(PetscArrayzero(reductions, n));
41   if (type == NORM_2) {
42     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
43       for (jb = 0; jb < bs; jb++) {
44         for (ib = 0; ib < bs; ib++) {
45           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
46           a_val++;
47         }
48       }
49     }
50   } else if (type == NORM_1) {
51     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
52       for (jb = 0; jb < bs; jb++) {
53         for (ib = 0; ib < bs; ib++) {
54           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
55           a_val++;
56         }
57       }
58     }
59   } else if (type == NORM_INFINITY) {
60     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
61       for (jb = 0; jb < bs; jb++) {
62         for (ib = 0; ib < bs; ib++) {
63           PetscInt col    = A->cmap->rstart + a_aij->j[i] * bs + jb;
64           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
65           a_val++;
66         }
67       }
68     }
69   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
70     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
71       for (jb = 0; jb < bs; jb++) {
72         for (ib = 0; ib < bs; ib++) {
73           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
74           a_val++;
75         }
76       }
77     }
78   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
79     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
80       for (jb = 0; jb < bs; jb++) {
81         for (ib = 0; ib < bs; ib++) {
82           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
83           a_val++;
84         }
85       }
86     }
87   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
88   if (type == NORM_2) {
89     for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
90   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
91     for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
92   }
93   PetscFunctionReturn(PETSC_SUCCESS);
94 }
95 
96 static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
97 {
98   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
99   PetscInt    *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
100   MatScalar   *v     = a->a, *odiag, *diag, work[25], *v_work;
101   PetscReal    shift = 0.0;
102   PetscBool    allowzeropivot, zeropivotdetected = PETSC_FALSE;
103 
104   PetscFunctionBegin;
105   allowzeropivot = PetscNot(A->erroriffailure);
106 
107   if (a->idiagvalid) {
108     if (values) *values = a->idiag;
109     PetscFunctionReturn(PETSC_SUCCESS);
110   }
111   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
112   diag_offset = a->diag;
113   if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); }
114   diag = a->idiag;
115   if (values) *values = a->idiag;
116   /* factor and invert each block */
117   switch (bs) {
118   case 1:
119     for (i = 0; i < mbs; i++) {
120       odiag   = v + 1 * diag_offset[i];
121       diag[0] = odiag[0];
122 
123       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
124         if (allowzeropivot) {
125           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
126           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
127           A->factorerror_zeropivot_row   = i;
128           PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
129         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
130       }
131 
132       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
133       diag += 1;
134     }
135     break;
136   case 2:
137     for (i = 0; i < mbs; i++) {
138       odiag   = v + 4 * diag_offset[i];
139       diag[0] = odiag[0];
140       diag[1] = odiag[1];
141       diag[2] = odiag[2];
142       diag[3] = odiag[3];
143       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
144       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
145       diag += 4;
146     }
147     break;
148   case 3:
149     for (i = 0; i < mbs; i++) {
150       odiag   = v + 9 * diag_offset[i];
151       diag[0] = odiag[0];
152       diag[1] = odiag[1];
153       diag[2] = odiag[2];
154       diag[3] = odiag[3];
155       diag[4] = odiag[4];
156       diag[5] = odiag[5];
157       diag[6] = odiag[6];
158       diag[7] = odiag[7];
159       diag[8] = odiag[8];
160       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
161       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
162       diag += 9;
163     }
164     break;
165   case 4:
166     for (i = 0; i < mbs; i++) {
167       odiag = v + 16 * diag_offset[i];
168       PetscCall(PetscArraycpy(diag, odiag, 16));
169       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
170       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
171       diag += 16;
172     }
173     break;
174   case 5:
175     for (i = 0; i < mbs; i++) {
176       odiag = v + 25 * diag_offset[i];
177       PetscCall(PetscArraycpy(diag, odiag, 25));
178       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
179       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
180       diag += 25;
181     }
182     break;
183   case 6:
184     for (i = 0; i < mbs; i++) {
185       odiag = v + 36 * diag_offset[i];
186       PetscCall(PetscArraycpy(diag, odiag, 36));
187       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
188       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
189       diag += 36;
190     }
191     break;
192   case 7:
193     for (i = 0; i < mbs; i++) {
194       odiag = v + 49 * diag_offset[i];
195       PetscCall(PetscArraycpy(diag, odiag, 49));
196       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
197       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
198       diag += 49;
199     }
200     break;
201   default:
202     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
203     for (i = 0; i < mbs; i++) {
204       odiag = v + bs2 * diag_offset[i];
205       PetscCall(PetscArraycpy(diag, odiag, bs2));
206       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
207       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
208       diag += bs2;
209     }
210     PetscCall(PetscFree2(v_work, v_pivots));
211   }
212   a->idiagvalid = PETSC_TRUE;
213   PetscFunctionReturn(PETSC_SUCCESS);
214 }
215 
216 static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
217 {
218   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
219   PetscScalar       *x, *work, *w, *workt, *t;
220   const MatScalar   *v, *aa = a->a, *idiag;
221   const PetscScalar *b, *xb;
222   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
223   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
224   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;
225 
226   PetscFunctionBegin;
227   its = its * lits;
228   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
229   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
230   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
231   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
232   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
233 
234   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
235 
236   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
237   diag  = a->diag;
238   idiag = a->idiag;
239   k     = PetscMax(A->rmap->n, A->cmap->n);
240   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
241   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
242   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
243   work = a->mult_work;
244   t    = a->sor_workt;
245   w    = a->sor_work;
246 
247   PetscCall(VecGetArray(xx, &x));
248   PetscCall(VecGetArrayRead(bb, &b));
249 
250   if (flag & SOR_ZERO_INITIAL_GUESS) {
251     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
252       switch (bs) {
253       case 1:
254         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
255         t[0] = b[0];
256         i2   = 1;
257         idiag += 1;
258         for (i = 1; i < m; i++) {
259           v    = aa + ai[i];
260           vi   = aj + ai[i];
261           nz   = diag[i] - ai[i];
262           s[0] = b[i2];
263           for (j = 0; j < nz; j++) {
264             xw[0] = x[vi[j]];
265             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
266           }
267           t[i2] = s[0];
268           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
269           x[i2] = xw[0];
270           idiag += 1;
271           i2 += 1;
272         }
273         break;
274       case 2:
275         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
276         t[0] = b[0];
277         t[1] = b[1];
278         i2   = 2;
279         idiag += 4;
280         for (i = 1; i < m; i++) {
281           v    = aa + 4 * ai[i];
282           vi   = aj + ai[i];
283           nz   = diag[i] - ai[i];
284           s[0] = b[i2];
285           s[1] = b[i2 + 1];
286           for (j = 0; j < nz; j++) {
287             idx   = 2 * vi[j];
288             it    = 4 * j;
289             xw[0] = x[idx];
290             xw[1] = x[1 + idx];
291             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
292           }
293           t[i2]     = s[0];
294           t[i2 + 1] = s[1];
295           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
296           x[i2]     = xw[0];
297           x[i2 + 1] = xw[1];
298           idiag += 4;
299           i2 += 2;
300         }
301         break;
302       case 3:
303         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
304         t[0] = b[0];
305         t[1] = b[1];
306         t[2] = b[2];
307         i2   = 3;
308         idiag += 9;
309         for (i = 1; i < m; i++) {
310           v    = aa + 9 * ai[i];
311           vi   = aj + ai[i];
312           nz   = diag[i] - ai[i];
313           s[0] = b[i2];
314           s[1] = b[i2 + 1];
315           s[2] = b[i2 + 2];
316           while (nz--) {
317             idx   = 3 * (*vi++);
318             xw[0] = x[idx];
319             xw[1] = x[1 + idx];
320             xw[2] = x[2 + idx];
321             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
322             v += 9;
323           }
324           t[i2]     = s[0];
325           t[i2 + 1] = s[1];
326           t[i2 + 2] = s[2];
327           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
328           x[i2]     = xw[0];
329           x[i2 + 1] = xw[1];
330           x[i2 + 2] = xw[2];
331           idiag += 9;
332           i2 += 3;
333         }
334         break;
335       case 4:
336         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
337         t[0] = b[0];
338         t[1] = b[1];
339         t[2] = b[2];
340         t[3] = b[3];
341         i2   = 4;
342         idiag += 16;
343         for (i = 1; i < m; i++) {
344           v    = aa + 16 * ai[i];
345           vi   = aj + ai[i];
346           nz   = diag[i] - ai[i];
347           s[0] = b[i2];
348           s[1] = b[i2 + 1];
349           s[2] = b[i2 + 2];
350           s[3] = b[i2 + 3];
351           while (nz--) {
352             idx   = 4 * (*vi++);
353             xw[0] = x[idx];
354             xw[1] = x[1 + idx];
355             xw[2] = x[2 + idx];
356             xw[3] = x[3 + idx];
357             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
358             v += 16;
359           }
360           t[i2]     = s[0];
361           t[i2 + 1] = s[1];
362           t[i2 + 2] = s[2];
363           t[i2 + 3] = s[3];
364           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
365           x[i2]     = xw[0];
366           x[i2 + 1] = xw[1];
367           x[i2 + 2] = xw[2];
368           x[i2 + 3] = xw[3];
369           idiag += 16;
370           i2 += 4;
371         }
372         break;
373       case 5:
374         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
375         t[0] = b[0];
376         t[1] = b[1];
377         t[2] = b[2];
378         t[3] = b[3];
379         t[4] = b[4];
380         i2   = 5;
381         idiag += 25;
382         for (i = 1; i < m; i++) {
383           v    = aa + 25 * ai[i];
384           vi   = aj + ai[i];
385           nz   = diag[i] - ai[i];
386           s[0] = b[i2];
387           s[1] = b[i2 + 1];
388           s[2] = b[i2 + 2];
389           s[3] = b[i2 + 3];
390           s[4] = b[i2 + 4];
391           while (nz--) {
392             idx   = 5 * (*vi++);
393             xw[0] = x[idx];
394             xw[1] = x[1 + idx];
395             xw[2] = x[2 + idx];
396             xw[3] = x[3 + idx];
397             xw[4] = x[4 + idx];
398             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
399             v += 25;
400           }
401           t[i2]     = s[0];
402           t[i2 + 1] = s[1];
403           t[i2 + 2] = s[2];
404           t[i2 + 3] = s[3];
405           t[i2 + 4] = s[4];
406           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
407           x[i2]     = xw[0];
408           x[i2 + 1] = xw[1];
409           x[i2 + 2] = xw[2];
410           x[i2 + 3] = xw[3];
411           x[i2 + 4] = xw[4];
412           idiag += 25;
413           i2 += 5;
414         }
415         break;
416       case 6:
417         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
418         t[0] = b[0];
419         t[1] = b[1];
420         t[2] = b[2];
421         t[3] = b[3];
422         t[4] = b[4];
423         t[5] = b[5];
424         i2   = 6;
425         idiag += 36;
426         for (i = 1; i < m; i++) {
427           v    = aa + 36 * ai[i];
428           vi   = aj + ai[i];
429           nz   = diag[i] - ai[i];
430           s[0] = b[i2];
431           s[1] = b[i2 + 1];
432           s[2] = b[i2 + 2];
433           s[3] = b[i2 + 3];
434           s[4] = b[i2 + 4];
435           s[5] = b[i2 + 5];
436           while (nz--) {
437             idx   = 6 * (*vi++);
438             xw[0] = x[idx];
439             xw[1] = x[1 + idx];
440             xw[2] = x[2 + idx];
441             xw[3] = x[3 + idx];
442             xw[4] = x[4 + idx];
443             xw[5] = x[5 + idx];
444             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
445             v += 36;
446           }
447           t[i2]     = s[0];
448           t[i2 + 1] = s[1];
449           t[i2 + 2] = s[2];
450           t[i2 + 3] = s[3];
451           t[i2 + 4] = s[4];
452           t[i2 + 5] = s[5];
453           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
454           x[i2]     = xw[0];
455           x[i2 + 1] = xw[1];
456           x[i2 + 2] = xw[2];
457           x[i2 + 3] = xw[3];
458           x[i2 + 4] = xw[4];
459           x[i2 + 5] = xw[5];
460           idiag += 36;
461           i2 += 6;
462         }
463         break;
464       case 7:
465         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
466         t[0] = b[0];
467         t[1] = b[1];
468         t[2] = b[2];
469         t[3] = b[3];
470         t[4] = b[4];
471         t[5] = b[5];
472         t[6] = b[6];
473         i2   = 7;
474         idiag += 49;
475         for (i = 1; i < m; i++) {
476           v    = aa + 49 * ai[i];
477           vi   = aj + ai[i];
478           nz   = diag[i] - ai[i];
479           s[0] = b[i2];
480           s[1] = b[i2 + 1];
481           s[2] = b[i2 + 2];
482           s[3] = b[i2 + 3];
483           s[4] = b[i2 + 4];
484           s[5] = b[i2 + 5];
485           s[6] = b[i2 + 6];
486           while (nz--) {
487             idx   = 7 * (*vi++);
488             xw[0] = x[idx];
489             xw[1] = x[1 + idx];
490             xw[2] = x[2 + idx];
491             xw[3] = x[3 + idx];
492             xw[4] = x[4 + idx];
493             xw[5] = x[5 + idx];
494             xw[6] = x[6 + idx];
495             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
496             v += 49;
497           }
498           t[i2]     = s[0];
499           t[i2 + 1] = s[1];
500           t[i2 + 2] = s[2];
501           t[i2 + 3] = s[3];
502           t[i2 + 4] = s[4];
503           t[i2 + 5] = s[5];
504           t[i2 + 6] = s[6];
505           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
506           x[i2]     = xw[0];
507           x[i2 + 1] = xw[1];
508           x[i2 + 2] = xw[2];
509           x[i2 + 3] = xw[3];
510           x[i2 + 4] = xw[4];
511           x[i2 + 5] = xw[5];
512           x[i2 + 6] = xw[6];
513           idiag += 49;
514           i2 += 7;
515         }
516         break;
517       default:
518         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
519         PetscCall(PetscArraycpy(t, b, bs));
520         i2 = bs;
521         idiag += bs2;
522         for (i = 1; i < m; i++) {
523           v  = aa + bs2 * ai[i];
524           vi = aj + ai[i];
525           nz = diag[i] - ai[i];
526 
527           PetscCall(PetscArraycpy(w, b + i2, bs));
528           /* copy all rows of x that are needed into contiguous space */
529           workt = work;
530           for (j = 0; j < nz; j++) {
531             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
532             workt += bs;
533           }
534           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
535           PetscCall(PetscArraycpy(t + i2, w, bs));
536           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
537 
538           idiag += bs2;
539           i2 += bs;
540         }
541         break;
542       }
543       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
544       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
545       xb = t;
546     } else xb = b;
547     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
548       idiag = a->idiag + bs2 * (a->mbs - 1);
549       i2    = bs * (m - 1);
550       switch (bs) {
551       case 1:
552         s[0] = xb[i2];
553         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
554         x[i2] = xw[0];
555         i2 -= 1;
556         for (i = m - 2; i >= 0; i--) {
557           v    = aa + (diag[i] + 1);
558           vi   = aj + diag[i] + 1;
559           nz   = ai[i + 1] - diag[i] - 1;
560           s[0] = xb[i2];
561           for (j = 0; j < nz; j++) {
562             xw[0] = x[vi[j]];
563             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
564           }
565           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
566           x[i2] = xw[0];
567           idiag -= 1;
568           i2 -= 1;
569         }
570         break;
571       case 2:
572         s[0] = xb[i2];
573         s[1] = xb[i2 + 1];
574         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
575         x[i2]     = xw[0];
576         x[i2 + 1] = xw[1];
577         i2 -= 2;
578         idiag -= 4;
579         for (i = m - 2; i >= 0; i--) {
580           v    = aa + 4 * (diag[i] + 1);
581           vi   = aj + diag[i] + 1;
582           nz   = ai[i + 1] - diag[i] - 1;
583           s[0] = xb[i2];
584           s[1] = xb[i2 + 1];
585           for (j = 0; j < nz; j++) {
586             idx   = 2 * vi[j];
587             it    = 4 * j;
588             xw[0] = x[idx];
589             xw[1] = x[1 + idx];
590             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
591           }
592           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
593           x[i2]     = xw[0];
594           x[i2 + 1] = xw[1];
595           idiag -= 4;
596           i2 -= 2;
597         }
598         break;
599       case 3:
600         s[0] = xb[i2];
601         s[1] = xb[i2 + 1];
602         s[2] = xb[i2 + 2];
603         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
604         x[i2]     = xw[0];
605         x[i2 + 1] = xw[1];
606         x[i2 + 2] = xw[2];
607         i2 -= 3;
608         idiag -= 9;
609         for (i = m - 2; i >= 0; i--) {
610           v    = aa + 9 * (diag[i] + 1);
611           vi   = aj + diag[i] + 1;
612           nz   = ai[i + 1] - diag[i] - 1;
613           s[0] = xb[i2];
614           s[1] = xb[i2 + 1];
615           s[2] = xb[i2 + 2];
616           while (nz--) {
617             idx   = 3 * (*vi++);
618             xw[0] = x[idx];
619             xw[1] = x[1 + idx];
620             xw[2] = x[2 + idx];
621             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
622             v += 9;
623           }
624           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
625           x[i2]     = xw[0];
626           x[i2 + 1] = xw[1];
627           x[i2 + 2] = xw[2];
628           idiag -= 9;
629           i2 -= 3;
630         }
631         break;
632       case 4:
633         s[0] = xb[i2];
634         s[1] = xb[i2 + 1];
635         s[2] = xb[i2 + 2];
636         s[3] = xb[i2 + 3];
637         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
638         x[i2]     = xw[0];
639         x[i2 + 1] = xw[1];
640         x[i2 + 2] = xw[2];
641         x[i2 + 3] = xw[3];
642         i2 -= 4;
643         idiag -= 16;
644         for (i = m - 2; i >= 0; i--) {
645           v    = aa + 16 * (diag[i] + 1);
646           vi   = aj + diag[i] + 1;
647           nz   = ai[i + 1] - diag[i] - 1;
648           s[0] = xb[i2];
649           s[1] = xb[i2 + 1];
650           s[2] = xb[i2 + 2];
651           s[3] = xb[i2 + 3];
652           while (nz--) {
653             idx   = 4 * (*vi++);
654             xw[0] = x[idx];
655             xw[1] = x[1 + idx];
656             xw[2] = x[2 + idx];
657             xw[3] = x[3 + idx];
658             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
659             v += 16;
660           }
661           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
662           x[i2]     = xw[0];
663           x[i2 + 1] = xw[1];
664           x[i2 + 2] = xw[2];
665           x[i2 + 3] = xw[3];
666           idiag -= 16;
667           i2 -= 4;
668         }
669         break;
670       case 5:
671         s[0] = xb[i2];
672         s[1] = xb[i2 + 1];
673         s[2] = xb[i2 + 2];
674         s[3] = xb[i2 + 3];
675         s[4] = xb[i2 + 4];
676         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
677         x[i2]     = xw[0];
678         x[i2 + 1] = xw[1];
679         x[i2 + 2] = xw[2];
680         x[i2 + 3] = xw[3];
681         x[i2 + 4] = xw[4];
682         i2 -= 5;
683         idiag -= 25;
684         for (i = m - 2; i >= 0; i--) {
685           v    = aa + 25 * (diag[i] + 1);
686           vi   = aj + diag[i] + 1;
687           nz   = ai[i + 1] - diag[i] - 1;
688           s[0] = xb[i2];
689           s[1] = xb[i2 + 1];
690           s[2] = xb[i2 + 2];
691           s[3] = xb[i2 + 3];
692           s[4] = xb[i2 + 4];
693           while (nz--) {
694             idx   = 5 * (*vi++);
695             xw[0] = x[idx];
696             xw[1] = x[1 + idx];
697             xw[2] = x[2 + idx];
698             xw[3] = x[3 + idx];
699             xw[4] = x[4 + idx];
700             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
701             v += 25;
702           }
703           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
704           x[i2]     = xw[0];
705           x[i2 + 1] = xw[1];
706           x[i2 + 2] = xw[2];
707           x[i2 + 3] = xw[3];
708           x[i2 + 4] = xw[4];
709           idiag -= 25;
710           i2 -= 5;
711         }
712         break;
713       case 6:
714         s[0] = xb[i2];
715         s[1] = xb[i2 + 1];
716         s[2] = xb[i2 + 2];
717         s[3] = xb[i2 + 3];
718         s[4] = xb[i2 + 4];
719         s[5] = xb[i2 + 5];
720         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
721         x[i2]     = xw[0];
722         x[i2 + 1] = xw[1];
723         x[i2 + 2] = xw[2];
724         x[i2 + 3] = xw[3];
725         x[i2 + 4] = xw[4];
726         x[i2 + 5] = xw[5];
727         i2 -= 6;
728         idiag -= 36;
729         for (i = m - 2; i >= 0; i--) {
730           v    = aa + 36 * (diag[i] + 1);
731           vi   = aj + diag[i] + 1;
732           nz   = ai[i + 1] - diag[i] - 1;
733           s[0] = xb[i2];
734           s[1] = xb[i2 + 1];
735           s[2] = xb[i2 + 2];
736           s[3] = xb[i2 + 3];
737           s[4] = xb[i2 + 4];
738           s[5] = xb[i2 + 5];
739           while (nz--) {
740             idx   = 6 * (*vi++);
741             xw[0] = x[idx];
742             xw[1] = x[1 + idx];
743             xw[2] = x[2 + idx];
744             xw[3] = x[3 + idx];
745             xw[4] = x[4 + idx];
746             xw[5] = x[5 + idx];
747             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
748             v += 36;
749           }
750           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
751           x[i2]     = xw[0];
752           x[i2 + 1] = xw[1];
753           x[i2 + 2] = xw[2];
754           x[i2 + 3] = xw[3];
755           x[i2 + 4] = xw[4];
756           x[i2 + 5] = xw[5];
757           idiag -= 36;
758           i2 -= 6;
759         }
760         break;
761       case 7:
762         s[0] = xb[i2];
763         s[1] = xb[i2 + 1];
764         s[2] = xb[i2 + 2];
765         s[3] = xb[i2 + 3];
766         s[4] = xb[i2 + 4];
767         s[5] = xb[i2 + 5];
768         s[6] = xb[i2 + 6];
769         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
770         x[i2]     = xw[0];
771         x[i2 + 1] = xw[1];
772         x[i2 + 2] = xw[2];
773         x[i2 + 3] = xw[3];
774         x[i2 + 4] = xw[4];
775         x[i2 + 5] = xw[5];
776         x[i2 + 6] = xw[6];
777         i2 -= 7;
778         idiag -= 49;
779         for (i = m - 2; i >= 0; i--) {
780           v    = aa + 49 * (diag[i] + 1);
781           vi   = aj + diag[i] + 1;
782           nz   = ai[i + 1] - diag[i] - 1;
783           s[0] = xb[i2];
784           s[1] = xb[i2 + 1];
785           s[2] = xb[i2 + 2];
786           s[3] = xb[i2 + 3];
787           s[4] = xb[i2 + 4];
788           s[5] = xb[i2 + 5];
789           s[6] = xb[i2 + 6];
790           while (nz--) {
791             idx   = 7 * (*vi++);
792             xw[0] = x[idx];
793             xw[1] = x[1 + idx];
794             xw[2] = x[2 + idx];
795             xw[3] = x[3 + idx];
796             xw[4] = x[4 + idx];
797             xw[5] = x[5 + idx];
798             xw[6] = x[6 + idx];
799             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
800             v += 49;
801           }
802           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
803           x[i2]     = xw[0];
804           x[i2 + 1] = xw[1];
805           x[i2 + 2] = xw[2];
806           x[i2 + 3] = xw[3];
807           x[i2 + 4] = xw[4];
808           x[i2 + 5] = xw[5];
809           x[i2 + 6] = xw[6];
810           idiag -= 49;
811           i2 -= 7;
812         }
813         break;
814       default:
815         PetscCall(PetscArraycpy(w, xb + i2, bs));
816         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
817         i2 -= bs;
818         idiag -= bs2;
819         for (i = m - 2; i >= 0; i--) {
820           v  = aa + bs2 * (diag[i] + 1);
821           vi = aj + diag[i] + 1;
822           nz = ai[i + 1] - diag[i] - 1;
823 
824           PetscCall(PetscArraycpy(w, xb + i2, bs));
825           /* copy all rows of x that are needed into contiguous space */
826           workt = work;
827           for (j = 0; j < nz; j++) {
828             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
829             workt += bs;
830           }
831           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
832           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
833 
834           idiag -= bs2;
835           i2 -= bs;
836         }
837         break;
838       }
839       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
840     }
841     its--;
842   }
843   while (its--) {
844     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
845       idiag = a->idiag;
846       i2    = 0;
847       switch (bs) {
848       case 1:
849         for (i = 0; i < m; i++) {
850           v    = aa + ai[i];
851           vi   = aj + ai[i];
852           nz   = ai[i + 1] - ai[i];
853           s[0] = b[i2];
854           for (j = 0; j < nz; j++) {
855             xw[0] = x[vi[j]];
856             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
857           }
858           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
859           x[i2] += xw[0];
860           idiag += 1;
861           i2 += 1;
862         }
863         break;
864       case 2:
865         for (i = 0; i < m; i++) {
866           v    = aa + 4 * ai[i];
867           vi   = aj + ai[i];
868           nz   = ai[i + 1] - ai[i];
869           s[0] = b[i2];
870           s[1] = b[i2 + 1];
871           for (j = 0; j < nz; j++) {
872             idx   = 2 * vi[j];
873             it    = 4 * j;
874             xw[0] = x[idx];
875             xw[1] = x[1 + idx];
876             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
877           }
878           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
879           x[i2] += xw[0];
880           x[i2 + 1] += xw[1];
881           idiag += 4;
882           i2 += 2;
883         }
884         break;
885       case 3:
886         for (i = 0; i < m; i++) {
887           v    = aa + 9 * ai[i];
888           vi   = aj + ai[i];
889           nz   = ai[i + 1] - ai[i];
890           s[0] = b[i2];
891           s[1] = b[i2 + 1];
892           s[2] = b[i2 + 2];
893           while (nz--) {
894             idx   = 3 * (*vi++);
895             xw[0] = x[idx];
896             xw[1] = x[1 + idx];
897             xw[2] = x[2 + idx];
898             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
899             v += 9;
900           }
901           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
902           x[i2] += xw[0];
903           x[i2 + 1] += xw[1];
904           x[i2 + 2] += xw[2];
905           idiag += 9;
906           i2 += 3;
907         }
908         break;
909       case 4:
910         for (i = 0; i < m; i++) {
911           v    = aa + 16 * ai[i];
912           vi   = aj + ai[i];
913           nz   = ai[i + 1] - ai[i];
914           s[0] = b[i2];
915           s[1] = b[i2 + 1];
916           s[2] = b[i2 + 2];
917           s[3] = b[i2 + 3];
918           while (nz--) {
919             idx   = 4 * (*vi++);
920             xw[0] = x[idx];
921             xw[1] = x[1 + idx];
922             xw[2] = x[2 + idx];
923             xw[3] = x[3 + idx];
924             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
925             v += 16;
926           }
927           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
928           x[i2] += xw[0];
929           x[i2 + 1] += xw[1];
930           x[i2 + 2] += xw[2];
931           x[i2 + 3] += xw[3];
932           idiag += 16;
933           i2 += 4;
934         }
935         break;
936       case 5:
937         for (i = 0; i < m; i++) {
938           v    = aa + 25 * ai[i];
939           vi   = aj + ai[i];
940           nz   = ai[i + 1] - ai[i];
941           s[0] = b[i2];
942           s[1] = b[i2 + 1];
943           s[2] = b[i2 + 2];
944           s[3] = b[i2 + 3];
945           s[4] = b[i2 + 4];
946           while (nz--) {
947             idx   = 5 * (*vi++);
948             xw[0] = x[idx];
949             xw[1] = x[1 + idx];
950             xw[2] = x[2 + idx];
951             xw[3] = x[3 + idx];
952             xw[4] = x[4 + idx];
953             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
954             v += 25;
955           }
956           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
957           x[i2] += xw[0];
958           x[i2 + 1] += xw[1];
959           x[i2 + 2] += xw[2];
960           x[i2 + 3] += xw[3];
961           x[i2 + 4] += xw[4];
962           idiag += 25;
963           i2 += 5;
964         }
965         break;
966       case 6:
967         for (i = 0; i < m; i++) {
968           v    = aa + 36 * ai[i];
969           vi   = aj + ai[i];
970           nz   = ai[i + 1] - ai[i];
971           s[0] = b[i2];
972           s[1] = b[i2 + 1];
973           s[2] = b[i2 + 2];
974           s[3] = b[i2 + 3];
975           s[4] = b[i2 + 4];
976           s[5] = b[i2 + 5];
977           while (nz--) {
978             idx   = 6 * (*vi++);
979             xw[0] = x[idx];
980             xw[1] = x[1 + idx];
981             xw[2] = x[2 + idx];
982             xw[3] = x[3 + idx];
983             xw[4] = x[4 + idx];
984             xw[5] = x[5 + idx];
985             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
986             v += 36;
987           }
988           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
989           x[i2] += xw[0];
990           x[i2 + 1] += xw[1];
991           x[i2 + 2] += xw[2];
992           x[i2 + 3] += xw[3];
993           x[i2 + 4] += xw[4];
994           x[i2 + 5] += xw[5];
995           idiag += 36;
996           i2 += 6;
997         }
998         break;
999       case 7:
1000         for (i = 0; i < m; i++) {
1001           v    = aa + 49 * ai[i];
1002           vi   = aj + ai[i];
1003           nz   = ai[i + 1] - ai[i];
1004           s[0] = b[i2];
1005           s[1] = b[i2 + 1];
1006           s[2] = b[i2 + 2];
1007           s[3] = b[i2 + 3];
1008           s[4] = b[i2 + 4];
1009           s[5] = b[i2 + 5];
1010           s[6] = b[i2 + 6];
1011           while (nz--) {
1012             idx   = 7 * (*vi++);
1013             xw[0] = x[idx];
1014             xw[1] = x[1 + idx];
1015             xw[2] = x[2 + idx];
1016             xw[3] = x[3 + idx];
1017             xw[4] = x[4 + idx];
1018             xw[5] = x[5 + idx];
1019             xw[6] = x[6 + idx];
1020             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1021             v += 49;
1022           }
1023           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1024           x[i2] += xw[0];
1025           x[i2 + 1] += xw[1];
1026           x[i2 + 2] += xw[2];
1027           x[i2 + 3] += xw[3];
1028           x[i2 + 4] += xw[4];
1029           x[i2 + 5] += xw[5];
1030           x[i2 + 6] += xw[6];
1031           idiag += 49;
1032           i2 += 7;
1033         }
1034         break;
1035       default:
1036         for (i = 0; i < m; i++) {
1037           v  = aa + bs2 * ai[i];
1038           vi = aj + ai[i];
1039           nz = ai[i + 1] - ai[i];
1040 
1041           PetscCall(PetscArraycpy(w, b + i2, bs));
1042           /* copy all rows of x that are needed into contiguous space */
1043           workt = work;
1044           for (j = 0; j < nz; j++) {
1045             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1046             workt += bs;
1047           }
1048           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1049           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1050 
1051           idiag += bs2;
1052           i2 += bs;
1053         }
1054         break;
1055       }
1056       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1057     }
1058     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1059       idiag = a->idiag + bs2 * (a->mbs - 1);
1060       i2    = bs * (m - 1);
1061       switch (bs) {
1062       case 1:
1063         for (i = m - 1; i >= 0; i--) {
1064           v    = aa + ai[i];
1065           vi   = aj + ai[i];
1066           nz   = ai[i + 1] - ai[i];
1067           s[0] = b[i2];
1068           for (j = 0; j < nz; j++) {
1069             xw[0] = x[vi[j]];
1070             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1071           }
1072           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1073           x[i2] += xw[0];
1074           idiag -= 1;
1075           i2 -= 1;
1076         }
1077         break;
1078       case 2:
1079         for (i = m - 1; i >= 0; i--) {
1080           v    = aa + 4 * ai[i];
1081           vi   = aj + ai[i];
1082           nz   = ai[i + 1] - ai[i];
1083           s[0] = b[i2];
1084           s[1] = b[i2 + 1];
1085           for (j = 0; j < nz; j++) {
1086             idx   = 2 * vi[j];
1087             it    = 4 * j;
1088             xw[0] = x[idx];
1089             xw[1] = x[1 + idx];
1090             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1091           }
1092           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
1093           x[i2] += xw[0];
1094           x[i2 + 1] += xw[1];
1095           idiag -= 4;
1096           i2 -= 2;
1097         }
1098         break;
1099       case 3:
1100         for (i = m - 1; i >= 0; i--) {
1101           v    = aa + 9 * ai[i];
1102           vi   = aj + ai[i];
1103           nz   = ai[i + 1] - ai[i];
1104           s[0] = b[i2];
1105           s[1] = b[i2 + 1];
1106           s[2] = b[i2 + 2];
1107           while (nz--) {
1108             idx   = 3 * (*vi++);
1109             xw[0] = x[idx];
1110             xw[1] = x[1 + idx];
1111             xw[2] = x[2 + idx];
1112             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1113             v += 9;
1114           }
1115           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
1116           x[i2] += xw[0];
1117           x[i2 + 1] += xw[1];
1118           x[i2 + 2] += xw[2];
1119           idiag -= 9;
1120           i2 -= 3;
1121         }
1122         break;
1123       case 4:
1124         for (i = m - 1; i >= 0; i--) {
1125           v    = aa + 16 * ai[i];
1126           vi   = aj + ai[i];
1127           nz   = ai[i + 1] - ai[i];
1128           s[0] = b[i2];
1129           s[1] = b[i2 + 1];
1130           s[2] = b[i2 + 2];
1131           s[3] = b[i2 + 3];
1132           while (nz--) {
1133             idx   = 4 * (*vi++);
1134             xw[0] = x[idx];
1135             xw[1] = x[1 + idx];
1136             xw[2] = x[2 + idx];
1137             xw[3] = x[3 + idx];
1138             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1139             v += 16;
1140           }
1141           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
1142           x[i2] += xw[0];
1143           x[i2 + 1] += xw[1];
1144           x[i2 + 2] += xw[2];
1145           x[i2 + 3] += xw[3];
1146           idiag -= 16;
1147           i2 -= 4;
1148         }
1149         break;
1150       case 5:
1151         for (i = m - 1; i >= 0; i--) {
1152           v    = aa + 25 * ai[i];
1153           vi   = aj + ai[i];
1154           nz   = ai[i + 1] - ai[i];
1155           s[0] = b[i2];
1156           s[1] = b[i2 + 1];
1157           s[2] = b[i2 + 2];
1158           s[3] = b[i2 + 3];
1159           s[4] = b[i2 + 4];
1160           while (nz--) {
1161             idx   = 5 * (*vi++);
1162             xw[0] = x[idx];
1163             xw[1] = x[1 + idx];
1164             xw[2] = x[2 + idx];
1165             xw[3] = x[3 + idx];
1166             xw[4] = x[4 + idx];
1167             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1168             v += 25;
1169           }
1170           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
1171           x[i2] += xw[0];
1172           x[i2 + 1] += xw[1];
1173           x[i2 + 2] += xw[2];
1174           x[i2 + 3] += xw[3];
1175           x[i2 + 4] += xw[4];
1176           idiag -= 25;
1177           i2 -= 5;
1178         }
1179         break;
1180       case 6:
1181         for (i = m - 1; i >= 0; i--) {
1182           v    = aa + 36 * ai[i];
1183           vi   = aj + ai[i];
1184           nz   = ai[i + 1] - ai[i];
1185           s[0] = b[i2];
1186           s[1] = b[i2 + 1];
1187           s[2] = b[i2 + 2];
1188           s[3] = b[i2 + 3];
1189           s[4] = b[i2 + 4];
1190           s[5] = b[i2 + 5];
1191           while (nz--) {
1192             idx   = 6 * (*vi++);
1193             xw[0] = x[idx];
1194             xw[1] = x[1 + idx];
1195             xw[2] = x[2 + idx];
1196             xw[3] = x[3 + idx];
1197             xw[4] = x[4 + idx];
1198             xw[5] = x[5 + idx];
1199             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1200             v += 36;
1201           }
1202           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
1203           x[i2] += xw[0];
1204           x[i2 + 1] += xw[1];
1205           x[i2 + 2] += xw[2];
1206           x[i2 + 3] += xw[3];
1207           x[i2 + 4] += xw[4];
1208           x[i2 + 5] += xw[5];
1209           idiag -= 36;
1210           i2 -= 6;
1211         }
1212         break;
1213       case 7:
1214         for (i = m - 1; i >= 0; i--) {
1215           v    = aa + 49 * ai[i];
1216           vi   = aj + ai[i];
1217           nz   = ai[i + 1] - ai[i];
1218           s[0] = b[i2];
1219           s[1] = b[i2 + 1];
1220           s[2] = b[i2 + 2];
1221           s[3] = b[i2 + 3];
1222           s[4] = b[i2 + 4];
1223           s[5] = b[i2 + 5];
1224           s[6] = b[i2 + 6];
1225           while (nz--) {
1226             idx   = 7 * (*vi++);
1227             xw[0] = x[idx];
1228             xw[1] = x[1 + idx];
1229             xw[2] = x[2 + idx];
1230             xw[3] = x[3 + idx];
1231             xw[4] = x[4 + idx];
1232             xw[5] = x[5 + idx];
1233             xw[6] = x[6 + idx];
1234             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1235             v += 49;
1236           }
1237           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1238           x[i2] += xw[0];
1239           x[i2 + 1] += xw[1];
1240           x[i2 + 2] += xw[2];
1241           x[i2 + 3] += xw[3];
1242           x[i2 + 4] += xw[4];
1243           x[i2 + 5] += xw[5];
1244           x[i2 + 6] += xw[6];
1245           idiag -= 49;
1246           i2 -= 7;
1247         }
1248         break;
1249       default:
1250         for (i = m - 1; i >= 0; i--) {
1251           v  = aa + bs2 * ai[i];
1252           vi = aj + ai[i];
1253           nz = ai[i + 1] - ai[i];
1254 
1255           PetscCall(PetscArraycpy(w, b + i2, bs));
1256           /* copy all rows of x that are needed into contiguous space */
1257           workt = work;
1258           for (j = 0; j < nz; j++) {
1259             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1260             workt += bs;
1261           }
1262           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1263           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1264 
1265           idiag -= bs2;
1266           i2 -= bs;
1267         }
1268         break;
1269       }
1270       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1271     }
1272   }
1273   PetscCall(VecRestoreArray(xx, &x));
1274   PetscCall(VecRestoreArrayRead(bb, &b));
1275   PetscFunctionReturn(PETSC_SUCCESS);
1276 }
1277 
1278 /*
1279     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1280 */
1281 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1282   #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1283 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1284   #define matsetvaluesblocked4_ matsetvaluesblocked4
1285 #endif
1286 
1287 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1288 {
1289   Mat                A = *AA;
1290   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1291   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1292   PetscInt          *ai = a->i, *ailen = a->ilen;
1293   PetscInt          *aj = a->j, stepval, lastcol = -1;
1294   const PetscScalar *value = v;
1295   MatScalar         *ap, *aa = a->a, *bap;
1296 
1297   PetscFunctionBegin;
1298   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1299   stepval = (n - 1) * 4;
1300   for (k = 0; k < m; k++) { /* loop over added rows */
1301     row  = im[k];
1302     rp   = aj + ai[row];
1303     ap   = aa + 16 * ai[row];
1304     nrow = ailen[row];
1305     low  = 0;
1306     high = nrow;
1307     for (l = 0; l < n; l++) { /* loop over added columns */
1308       col = in[l];
1309       if (col <= lastcol) low = 0;
1310       else high = nrow;
1311       lastcol = col;
1312       value   = v + k * (stepval + 4 + l) * 4;
1313       while (high - low > 7) {
1314         t = (low + high) / 2;
1315         if (rp[t] > col) high = t;
1316         else low = t;
1317       }
1318       for (i = low; i < high; i++) {
1319         if (rp[i] > col) break;
1320         if (rp[i] == col) {
1321           bap = ap + 16 * i;
1322           for (ii = 0; ii < 4; ii++, value += stepval) {
1323             for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1324           }
1325           goto noinsert2;
1326         }
1327       }
1328       N = nrow++ - 1;
1329       high++; /* added new column index thus must search to one higher than before */
1330       /* shift up all the later entries in this row */
1331       for (ii = N; ii >= i; ii--) {
1332         rp[ii + 1] = rp[ii];
1333         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1334       }
1335       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1336       rp[i] = col;
1337       bap   = ap + 16 * i;
1338       for (ii = 0; ii < 4; ii++, value += stepval) {
1339         for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1340       }
1341     noinsert2:;
1342       low = i;
1343     }
1344     ailen[row] = nrow;
1345   }
1346   PetscFunctionReturnVoid();
1347 }
1348 
1349 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1350   #define matsetvalues4_ MATSETVALUES4
1351 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1352   #define matsetvalues4_ matsetvalues4
1353 #endif
1354 
1355 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1356 {
1357   Mat          A = *AA;
1358   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1359   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1360   PetscInt    *ai = a->i, *ailen = a->ilen;
1361   PetscInt    *aj = a->j, brow, bcol;
1362   PetscInt     ridx, cidx, lastcol = -1;
1363   MatScalar   *ap, value, *aa      = a->a, *bap;
1364 
1365   PetscFunctionBegin;
1366   for (k = 0; k < m; k++) { /* loop over added rows */
1367     row  = im[k];
1368     brow = row / 4;
1369     rp   = aj + ai[brow];
1370     ap   = aa + 16 * ai[brow];
1371     nrow = ailen[brow];
1372     low  = 0;
1373     high = nrow;
1374     for (l = 0; l < n; l++) { /* loop over added columns */
1375       col   = in[l];
1376       bcol  = col / 4;
1377       ridx  = row % 4;
1378       cidx  = col % 4;
1379       value = v[l + k * n];
1380       if (col <= lastcol) low = 0;
1381       else high = nrow;
1382       lastcol = col;
1383       while (high - low > 7) {
1384         t = (low + high) / 2;
1385         if (rp[t] > bcol) high = t;
1386         else low = t;
1387       }
1388       for (i = low; i < high; i++) {
1389         if (rp[i] > bcol) break;
1390         if (rp[i] == bcol) {
1391           bap = ap + 16 * i + 4 * cidx + ridx;
1392           *bap += value;
1393           goto noinsert1;
1394         }
1395       }
1396       N = nrow++ - 1;
1397       high++; /* added new column thus must search to one higher than before */
1398       /* shift up all the later entries in this row */
1399       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
1400       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
1401       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1402       rp[i]                        = bcol;
1403       ap[16 * i + 4 * cidx + ridx] = value;
1404     noinsert1:;
1405       low = i;
1406     }
1407     ailen[brow] = nrow;
1408   }
1409   PetscFunctionReturnVoid();
1410 }
1411 
1412 /*
1413      Checks for missing diagonals
1414 */
1415 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1416 {
1417   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1418   PetscInt    *diag, *ii = a->i, i;
1419 
1420   PetscFunctionBegin;
1421   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
1422   *missing = PETSC_FALSE;
1423   if (A->rmap->n > 0 && !ii) {
1424     *missing = PETSC_TRUE;
1425     if (d) *d = 0;
1426     PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n"));
1427   } else {
1428     PetscInt n;
1429     n    = PetscMin(a->mbs, a->nbs);
1430     diag = a->diag;
1431     for (i = 0; i < n; i++) {
1432       if (diag[i] >= ii[i + 1]) {
1433         *missing = PETSC_TRUE;
1434         if (d) *d = i;
1435         PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i));
1436         break;
1437       }
1438     }
1439   }
1440   PetscFunctionReturn(PETSC_SUCCESS);
1441 }
1442 
1443 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1444 {
1445   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1446   PetscInt     i, j, m = a->mbs;
1447 
1448   PetscFunctionBegin;
1449   if (!a->diag) {
1450     PetscCall(PetscMalloc1(m, &a->diag));
1451     a->free_diag = PETSC_TRUE;
1452   }
1453   for (i = 0; i < m; i++) {
1454     a->diag[i] = a->i[i + 1];
1455     for (j = a->i[i]; j < a->i[i + 1]; j++) {
1456       if (a->j[j] == i) {
1457         a->diag[i] = j;
1458         break;
1459       }
1460     }
1461   }
1462   PetscFunctionReturn(PETSC_SUCCESS);
1463 }
1464 
1465 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1466 {
1467   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1468   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
1469   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
1470 
1471   PetscFunctionBegin;
1472   *nn = n;
1473   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1474   if (symmetric) {
1475     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1476     nz = tia[n];
1477   } else {
1478     tia = a->i;
1479     tja = a->j;
1480   }
1481 
1482   if (!blockcompressed && bs > 1) {
1483     (*nn) *= bs;
1484     /* malloc & create the natural set of indices */
1485     PetscCall(PetscMalloc1((n + 1) * bs, ia));
1486     if (n) {
1487       (*ia)[0] = oshift;
1488       for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
1489     }
1490 
1491     for (i = 1; i < n; i++) {
1492       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1493       for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
1494     }
1495     if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1496 
1497     if (inja) {
1498       PetscCall(PetscMalloc1(nz * bs * bs, ja));
1499       cnt = 0;
1500       for (i = 0; i < n; i++) {
1501         for (j = 0; j < bs; j++) {
1502           for (k = tia[i]; k < tia[i + 1]; k++) {
1503             for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
1504           }
1505         }
1506       }
1507     }
1508 
1509     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1510       PetscCall(PetscFree(tia));
1511       PetscCall(PetscFree(tja));
1512     }
1513   } else if (oshift == 1) {
1514     if (symmetric) {
1515       nz = tia[A->rmap->n / bs];
1516       /*  add 1 to i and j indices */
1517       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1518       *ia = tia;
1519       if (ja) {
1520         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1521         *ja = tja;
1522       }
1523     } else {
1524       nz = a->i[A->rmap->n / bs];
1525       /* malloc space and  add 1 to i and j indices */
1526       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1527       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1528       if (ja) {
1529         PetscCall(PetscMalloc1(nz, ja));
1530         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1531       }
1532     }
1533   } else {
1534     *ia = tia;
1535     if (ja) *ja = tja;
1536   }
1537   PetscFunctionReturn(PETSC_SUCCESS);
1538 }
1539 
1540 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1541 {
1542   PetscFunctionBegin;
1543   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1544   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1545     PetscCall(PetscFree(*ia));
1546     if (ja) PetscCall(PetscFree(*ja));
1547   }
1548   PetscFunctionReturn(PETSC_SUCCESS);
1549 }
1550 
1551 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1552 {
1553   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1554 
1555   PetscFunctionBegin;
1556   if (A->hash_active) {
1557     PetscInt bs;
1558     A->ops[0] = a->cops;
1559     PetscCall(PetscHMapIJVDestroy(&a->ht));
1560     PetscCall(MatGetBlockSize(A, &bs));
1561     if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht));
1562     PetscCall(PetscFree(a->dnz));
1563     PetscCall(PetscFree(a->bdnz));
1564     A->hash_active = PETSC_FALSE;
1565   }
1566   PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
1567   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
1568   PetscCall(ISDestroy(&a->row));
1569   PetscCall(ISDestroy(&a->col));
1570   if (a->free_diag) PetscCall(PetscFree(a->diag));
1571   PetscCall(PetscFree(a->idiag));
1572   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
1573   PetscCall(PetscFree(a->solve_work));
1574   PetscCall(PetscFree(a->mult_work));
1575   PetscCall(PetscFree(a->sor_workt));
1576   PetscCall(PetscFree(a->sor_work));
1577   PetscCall(ISDestroy(&a->icol));
1578   PetscCall(PetscFree(a->saved_values));
1579   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1580 
1581   PetscCall(MatDestroy(&a->sbaijMat));
1582   PetscCall(MatDestroy(&a->parent));
1583   PetscCall(PetscFree(A->data));
1584 
1585   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
1586   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
1587   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
1588   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
1589   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
1590   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
1591   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
1592   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
1593   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
1594   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
1595   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
1596   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
1597 #if defined(PETSC_HAVE_HYPRE)
1598   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
1599 #endif
1600   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
1601   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1602   PetscFunctionReturn(PETSC_SUCCESS);
1603 }
1604 
1605 static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1606 {
1607   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1608 
1609   PetscFunctionBegin;
1610   switch (op) {
1611   case MAT_ROW_ORIENTED:
1612     a->roworiented = flg;
1613     break;
1614   case MAT_KEEP_NONZERO_PATTERN:
1615     a->keepnonzeropattern = flg;
1616     break;
1617   case MAT_NEW_NONZERO_LOCATIONS:
1618     a->nonew = (flg ? 0 : 1);
1619     break;
1620   case MAT_NEW_NONZERO_LOCATION_ERR:
1621     a->nonew = (flg ? -1 : 0);
1622     break;
1623   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1624     a->nonew = (flg ? -2 : 0);
1625     break;
1626   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1627     a->nounused = (flg ? -1 : 0);
1628     break;
1629   case MAT_FORCE_DIAGONAL_ENTRIES:
1630   case MAT_IGNORE_OFF_PROC_ENTRIES:
1631   case MAT_USE_HASH_TABLE:
1632   case MAT_SORTED_FULL:
1633     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1634     break;
1635   case MAT_SPD:
1636   case MAT_SYMMETRIC:
1637   case MAT_STRUCTURALLY_SYMMETRIC:
1638   case MAT_HERMITIAN:
1639   case MAT_SYMMETRY_ETERNAL:
1640   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1641   case MAT_SUBMAT_SINGLEIS:
1642   case MAT_STRUCTURE_ONLY:
1643   case MAT_SPD_ETERNAL:
1644     /* if the diagonal matrix is square it inherits some of the properties above */
1645     break;
1646   default:
1647     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1648   }
1649   PetscFunctionReturn(PETSC_SUCCESS);
1650 }
1651 
1652 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1653 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1654 {
1655   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
1656   MatScalar   *aa_i;
1657   PetscScalar *v_i;
1658 
1659   PetscFunctionBegin;
1660   bs  = A->rmap->bs;
1661   bs2 = bs * bs;
1662   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1663 
1664   bn  = row / bs; /* Block number */
1665   bp  = row % bs; /* Block Position */
1666   M   = ai[bn + 1] - ai[bn];
1667   *nz = bs * M;
1668 
1669   if (v) {
1670     *v = NULL;
1671     if (*nz) {
1672       PetscCall(PetscMalloc1(*nz, v));
1673       for (i = 0; i < M; i++) { /* for each block in the block row */
1674         v_i  = *v + i * bs;
1675         aa_i = aa + bs2 * (ai[bn] + i);
1676         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
1677       }
1678     }
1679   }
1680 
1681   if (idx) {
1682     *idx = NULL;
1683     if (*nz) {
1684       PetscCall(PetscMalloc1(*nz, idx));
1685       for (i = 0; i < M; i++) { /* for each block in the block row */
1686         idx_i = *idx + i * bs;
1687         itmp  = bs * aj[ai[bn] + i];
1688         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
1689       }
1690     }
1691   }
1692   PetscFunctionReturn(PETSC_SUCCESS);
1693 }
1694 
1695 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1696 {
1697   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1698 
1699   PetscFunctionBegin;
1700   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
1701   PetscFunctionReturn(PETSC_SUCCESS);
1702 }
1703 
1704 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1705 {
1706   PetscFunctionBegin;
1707   if (idx) PetscCall(PetscFree(*idx));
1708   if (v) PetscCall(PetscFree(*v));
1709   PetscFunctionReturn(PETSC_SUCCESS);
1710 }
1711 
1712 static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1713 {
1714   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
1715   Mat          C;
1716   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
1717   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
1718   MatScalar   *ata, *aa = a->a;
1719 
1720   PetscFunctionBegin;
1721   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1722   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1723   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1724     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1725 
1726     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
1727     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
1728     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
1729     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
1730 
1731     at  = (Mat_SeqBAIJ *)C->data;
1732     ati = at->i;
1733     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1734   } else {
1735     C   = *B;
1736     at  = (Mat_SeqBAIJ *)C->data;
1737     ati = at->i;
1738   }
1739 
1740   atj = at->j;
1741   ata = at->a;
1742 
1743   /* Copy ati into atfill so we have locations of the next free space in atj */
1744   PetscCall(PetscArraycpy(atfill, ati, nbs));
1745 
1746   /* Walk through A row-wise and mark nonzero entries of A^T. */
1747   for (i = 0; i < mbs; i++) {
1748     anzj = ai[i + 1] - ai[i];
1749     for (j = 0; j < anzj; j++) {
1750       atj[atfill[*aj]] = i;
1751       for (kr = 0; kr < bs; kr++) {
1752         for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
1753       }
1754       atfill[*aj++] += 1;
1755     }
1756   }
1757   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1758   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1759 
1760   /* Clean up temporary space and complete requests. */
1761   PetscCall(PetscFree(atfill));
1762 
1763   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1764     PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1765     *B = C;
1766   } else {
1767     PetscCall(MatHeaderMerge(A, &C));
1768   }
1769   PetscFunctionReturn(PETSC_SUCCESS);
1770 }
1771 
1772 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1773 {
1774   Mat Btrans;
1775 
1776   PetscFunctionBegin;
1777   *f = PETSC_FALSE;
1778   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
1779   PetscCall(MatEqual_SeqBAIJ(B, Btrans, f));
1780   PetscCall(MatDestroy(&Btrans));
1781   PetscFunctionReturn(PETSC_SUCCESS);
1782 }
1783 
1784 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1785 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1786 {
1787   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1788   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1789   PetscInt    *rowlens, *colidxs;
1790   PetscScalar *matvals;
1791 
1792   PetscFunctionBegin;
1793   PetscCall(PetscViewerSetUp(viewer));
1794 
1795   M  = mat->rmap->N;
1796   N  = mat->cmap->N;
1797   m  = mat->rmap->n;
1798   bs = mat->rmap->bs;
1799   nz = bs * bs * A->nz;
1800 
1801   /* write matrix header */
1802   header[0] = MAT_FILE_CLASSID;
1803   header[1] = M;
1804   header[2] = N;
1805   header[3] = nz;
1806   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1807 
1808   /* store row lengths */
1809   PetscCall(PetscMalloc1(m, &rowlens));
1810   for (cnt = 0, i = 0; i < A->mbs; i++)
1811     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
1812   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
1813   PetscCall(PetscFree(rowlens));
1814 
1815   /* store column indices  */
1816   PetscCall(PetscMalloc1(nz, &colidxs));
1817   for (cnt = 0, i = 0; i < A->mbs; i++)
1818     for (k = 0; k < bs; k++)
1819       for (j = A->i[i]; j < A->i[i + 1]; j++)
1820         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
1821   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1822   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
1823   PetscCall(PetscFree(colidxs));
1824 
1825   /* store nonzero values */
1826   PetscCall(PetscMalloc1(nz, &matvals));
1827   for (cnt = 0, i = 0; i < A->mbs; i++)
1828     for (k = 0; k < bs; k++)
1829       for (j = A->i[i]; j < A->i[i + 1]; j++)
1830         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
1831   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1832   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
1833   PetscCall(PetscFree(matvals));
1834 
1835   /* write block size option to the viewer's .info file */
1836   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1837   PetscFunctionReturn(PETSC_SUCCESS);
1838 }
1839 
1840 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1841 {
1842   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1843   PetscInt     i, bs = A->rmap->bs, k;
1844 
1845   PetscFunctionBegin;
1846   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1847   for (i = 0; i < a->mbs; i++) {
1848     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
1849     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
1850     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1851   }
1852   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1853   PetscFunctionReturn(PETSC_SUCCESS);
1854 }
1855 
1856 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1857 {
1858   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1859   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1860   PetscViewerFormat format;
1861 
1862   PetscFunctionBegin;
1863   if (A->structure_only) {
1864     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
1865     PetscFunctionReturn(PETSC_SUCCESS);
1866   }
1867 
1868   PetscCall(PetscViewerGetFormat(viewer, &format));
1869   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1870     PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1871   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1872     const char *matname;
1873     Mat         aij;
1874     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
1875     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
1876     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
1877     PetscCall(MatView(aij, viewer));
1878     PetscCall(MatDestroy(&aij));
1879   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1880     PetscFunctionReturn(PETSC_SUCCESS);
1881   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1882     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1883     for (i = 0; i < a->mbs; i++) {
1884       for (j = 0; j < bs; j++) {
1885         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1886         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1887           for (l = 0; l < bs; l++) {
1888 #if defined(PETSC_USE_COMPLEX)
1889             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1890               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1891             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1892               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1893             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1894               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1895             }
1896 #else
1897             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1898 #endif
1899           }
1900         }
1901         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1902       }
1903     }
1904     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1905   } else {
1906     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1907     for (i = 0; i < a->mbs; i++) {
1908       for (j = 0; j < bs; j++) {
1909         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1910         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1911           for (l = 0; l < bs; l++) {
1912 #if defined(PETSC_USE_COMPLEX)
1913             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
1914               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1915             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
1916               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1917             } else {
1918               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1919             }
1920 #else
1921             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1922 #endif
1923           }
1924         }
1925         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1926       }
1927     }
1928     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1929   }
1930   PetscCall(PetscViewerFlush(viewer));
1931   PetscFunctionReturn(PETSC_SUCCESS);
1932 }
1933 
1934 #include <petscdraw.h>
1935 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1936 {
1937   Mat               A = (Mat)Aa;
1938   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1939   PetscInt          row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
1940   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1941   MatScalar        *aa;
1942   PetscViewer       viewer;
1943   PetscViewerFormat format;
1944   int               color;
1945 
1946   PetscFunctionBegin;
1947   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
1948   PetscCall(PetscViewerGetFormat(viewer, &format));
1949   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
1950 
1951   /* loop over matrix elements drawing boxes */
1952 
1953   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1954     PetscDrawCollectiveBegin(draw);
1955     /* Blue for negative, Cyan for zero and  Red for positive */
1956     color = PETSC_DRAW_BLUE;
1957     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1958       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1959         y_l = A->rmap->N - row - 1.0;
1960         y_r = y_l + 1.0;
1961         x_l = a->j[j] * bs;
1962         x_r = x_l + 1.0;
1963         aa  = a->a + j * bs2;
1964         for (k = 0; k < bs; k++) {
1965           for (l = 0; l < bs; l++) {
1966             if (PetscRealPart(*aa++) >= 0.) continue;
1967             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1968           }
1969         }
1970       }
1971     }
1972     color = PETSC_DRAW_CYAN;
1973     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1974       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1975         y_l = A->rmap->N - row - 1.0;
1976         y_r = y_l + 1.0;
1977         x_l = a->j[j] * bs;
1978         x_r = x_l + 1.0;
1979         aa  = a->a + j * bs2;
1980         for (k = 0; k < bs; k++) {
1981           for (l = 0; l < bs; l++) {
1982             if (PetscRealPart(*aa++) != 0.) continue;
1983             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1984           }
1985         }
1986       }
1987     }
1988     color = PETSC_DRAW_RED;
1989     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1990       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1991         y_l = A->rmap->N - row - 1.0;
1992         y_r = y_l + 1.0;
1993         x_l = a->j[j] * bs;
1994         x_r = x_l + 1.0;
1995         aa  = a->a + j * bs2;
1996         for (k = 0; k < bs; k++) {
1997           for (l = 0; l < bs; l++) {
1998             if (PetscRealPart(*aa++) <= 0.) continue;
1999             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2000           }
2001         }
2002       }
2003     }
2004     PetscDrawCollectiveEnd(draw);
2005   } else {
2006     /* use contour shading to indicate magnitude of values */
2007     /* first determine max of all nonzero values */
2008     PetscReal minv = 0.0, maxv = 0.0;
2009     PetscDraw popup;
2010 
2011     for (i = 0; i < a->nz * a->bs2; i++) {
2012       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
2013     }
2014     if (minv >= maxv) maxv = minv + PETSC_SMALL;
2015     PetscCall(PetscDrawGetPopup(draw, &popup));
2016     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
2017 
2018     PetscDrawCollectiveBegin(draw);
2019     for (i = 0, row = 0; i < mbs; i++, row += bs) {
2020       for (j = a->i[i]; j < a->i[i + 1]; j++) {
2021         y_l = A->rmap->N - row - 1.0;
2022         y_r = y_l + 1.0;
2023         x_l = a->j[j] * bs;
2024         x_r = x_l + 1.0;
2025         aa  = a->a + j * bs2;
2026         for (k = 0; k < bs; k++) {
2027           for (l = 0; l < bs; l++) {
2028             MatScalar v = *aa++;
2029             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
2030             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2031           }
2032         }
2033       }
2034     }
2035     PetscDrawCollectiveEnd(draw);
2036   }
2037   PetscFunctionReturn(PETSC_SUCCESS);
2038 }
2039 
2040 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2041 {
2042   PetscReal xl, yl, xr, yr, w, h;
2043   PetscDraw draw;
2044   PetscBool isnull;
2045 
2046   PetscFunctionBegin;
2047   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
2048   PetscCall(PetscDrawIsNull(draw, &isnull));
2049   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
2050 
2051   xr = A->cmap->n;
2052   yr = A->rmap->N;
2053   h  = yr / 10.0;
2054   w  = xr / 10.0;
2055   xr += w;
2056   yr += h;
2057   xl = -w;
2058   yl = -h;
2059   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
2060   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
2061   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
2062   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
2063   PetscCall(PetscDrawSave(draw));
2064   PetscFunctionReturn(PETSC_SUCCESS);
2065 }
2066 
2067 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2068 {
2069   PetscBool iascii, isbinary, isdraw;
2070 
2071   PetscFunctionBegin;
2072   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2073   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2074   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
2075   if (iascii) {
2076     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
2077   } else if (isbinary) {
2078     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
2079   } else if (isdraw) {
2080     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
2081   } else {
2082     Mat B;
2083     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2084     PetscCall(MatView(B, viewer));
2085     PetscCall(MatDestroy(&B));
2086   }
2087   PetscFunctionReturn(PETSC_SUCCESS);
2088 }
2089 
2090 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2091 {
2092   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2093   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2094   PetscInt    *ai = a->i, *ailen = a->ilen;
2095   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
2096   MatScalar   *ap, *aa = a->a;
2097 
2098   PetscFunctionBegin;
2099   for (k = 0; k < m; k++) { /* loop over rows */
2100     row  = im[k];
2101     brow = row / bs;
2102     if (row < 0) {
2103       v += n;
2104       continue;
2105     } /* negative row */
2106     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2107     rp   = PetscSafePointerPlusOffset(aj, ai[brow]);
2108     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
2109     nrow = ailen[brow];
2110     for (l = 0; l < n; l++) { /* loop over columns */
2111       if (in[l] < 0) {
2112         v++;
2113         continue;
2114       } /* negative column */
2115       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
2116       col  = in[l];
2117       bcol = col / bs;
2118       cidx = col % bs;
2119       ridx = row % bs;
2120       high = nrow;
2121       low  = 0; /* assume unsorted */
2122       while (high - low > 5) {
2123         t = (low + high) / 2;
2124         if (rp[t] > bcol) high = t;
2125         else low = t;
2126       }
2127       for (i = low; i < high; i++) {
2128         if (rp[i] > bcol) break;
2129         if (rp[i] == bcol) {
2130           *v++ = ap[bs2 * i + bs * cidx + ridx];
2131           goto finished;
2132         }
2133       }
2134       *v++ = 0.0;
2135     finished:;
2136     }
2137   }
2138   PetscFunctionReturn(PETSC_SUCCESS);
2139 }
2140 
2141 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2142 {
2143   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2144   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2145   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2146   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2147   PetscBool          roworiented = a->roworiented;
2148   const PetscScalar *value       = v;
2149   MatScalar         *ap = NULL, *aa = a->a, *bap;
2150 
2151   PetscFunctionBegin;
2152   if (roworiented) {
2153     stepval = (n - 1) * bs;
2154   } else {
2155     stepval = (m - 1) * bs;
2156   }
2157   for (k = 0; k < m; k++) { /* loop over added rows */
2158     row = im[k];
2159     if (row < 0) continue;
2160     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
2161     rp = aj + ai[row];
2162     if (!A->structure_only) ap = aa + bs2 * ai[row];
2163     rmax = imax[row];
2164     nrow = ailen[row];
2165     low  = 0;
2166     high = nrow;
2167     for (l = 0; l < n; l++) { /* loop over added columns */
2168       if (in[l] < 0) continue;
2169       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
2170       col = in[l];
2171       if (!A->structure_only) {
2172         if (roworiented) {
2173           value = v + (k * (stepval + bs) + l) * bs;
2174         } else {
2175           value = v + (l * (stepval + bs) + k) * bs;
2176         }
2177       }
2178       if (col <= lastcol) low = 0;
2179       else high = nrow;
2180       lastcol = col;
2181       while (high - low > 7) {
2182         t = (low + high) / 2;
2183         if (rp[t] > col) high = t;
2184         else low = t;
2185       }
2186       for (i = low; i < high; i++) {
2187         if (rp[i] > col) break;
2188         if (rp[i] == col) {
2189           if (A->structure_only) goto noinsert2;
2190           bap = ap + bs2 * i;
2191           if (roworiented) {
2192             if (is == ADD_VALUES) {
2193               for (ii = 0; ii < bs; ii++, value += stepval) {
2194                 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2195               }
2196             } else {
2197               for (ii = 0; ii < bs; ii++, value += stepval) {
2198                 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2199               }
2200             }
2201           } else {
2202             if (is == ADD_VALUES) {
2203               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2204                 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
2205                 bap += bs;
2206               }
2207             } else {
2208               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2209                 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
2210                 bap += bs;
2211               }
2212             }
2213           }
2214           goto noinsert2;
2215         }
2216       }
2217       if (nonew == 1) goto noinsert2;
2218       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2219       if (A->structure_only) {
2220         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
2221       } else {
2222         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2223       }
2224       N = nrow++ - 1;
2225       high++;
2226       /* shift up all the later entries in this row */
2227       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2228       rp[i] = col;
2229       if (!A->structure_only) {
2230         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2231         bap = ap + bs2 * i;
2232         if (roworiented) {
2233           for (ii = 0; ii < bs; ii++, value += stepval) {
2234             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2235           }
2236         } else {
2237           for (ii = 0; ii < bs; ii++, value += stepval) {
2238             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2239           }
2240         }
2241       }
2242     noinsert2:;
2243       low = i;
2244     }
2245     ailen[row] = nrow;
2246   }
2247   PetscFunctionReturn(PETSC_SUCCESS);
2248 }
2249 
2250 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2251 {
2252   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2253   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2254   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2255   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2256   MatScalar   *aa    = a->a, *ap;
2257   PetscReal    ratio = 0.6;
2258 
2259   PetscFunctionBegin;
2260   if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS);
2261 
2262   if (m) rmax = ailen[0];
2263   for (i = 1; i < mbs; i++) {
2264     /* move each row back by the amount of empty slots (fshift) before it*/
2265     fshift += imax[i - 1] - ailen[i - 1];
2266     rmax = PetscMax(rmax, ailen[i]);
2267     if (fshift) {
2268       ip = aj + ai[i];
2269       ap = aa + bs2 * ai[i];
2270       N  = ailen[i];
2271       PetscCall(PetscArraymove(ip - fshift, ip, N));
2272       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2273     }
2274     ai[i] = ai[i - 1] + ailen[i - 1];
2275   }
2276   if (mbs) {
2277     fshift += imax[mbs - 1] - ailen[mbs - 1];
2278     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2279   }
2280 
2281   /* reset ilen and imax for each row */
2282   a->nonzerorowcnt = 0;
2283   if (A->structure_only) {
2284     PetscCall(PetscFree2(a->imax, a->ilen));
2285   } else { /* !A->structure_only */
2286     for (i = 0; i < mbs; i++) {
2287       ailen[i] = imax[i] = ai[i + 1] - ai[i];
2288       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2289     }
2290   }
2291   a->nz = ai[mbs];
2292 
2293   /* diagonals may have moved, so kill the diagonal pointers */
2294   a->idiagvalid = PETSC_FALSE;
2295   if (fshift && a->diag) PetscCall(PetscFree(a->diag));
2296   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
2297   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
2298   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
2299   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
2300 
2301   A->info.mallocs += a->reallocs;
2302   a->reallocs         = 0;
2303   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2304   a->rmax             = rmax;
2305 
2306   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
2307   PetscFunctionReturn(PETSC_SUCCESS);
2308 }
2309 
2310 /*
2311    This function returns an array of flags which indicate the locations of contiguous
2312    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2313    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2314    Assume: sizes should be long enough to hold all the values.
2315 */
2316 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2317 {
2318   PetscInt j = 0;
2319 
2320   PetscFunctionBegin;
2321   for (PetscInt i = 0; i < n; j++) {
2322     PetscInt row = idx[i];
2323     if (row % bs != 0) { /* Not the beginning of a block */
2324       sizes[j] = 1;
2325       i++;
2326     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2327       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2328       i++;
2329     } else { /* Beginning of the block, so check if the complete block exists */
2330       PetscBool flg = PETSC_TRUE;
2331       for (PetscInt k = 1; k < bs; k++) {
2332         if (row + k != idx[i + k]) { /* break in the block */
2333           flg = PETSC_FALSE;
2334           break;
2335         }
2336       }
2337       if (flg) { /* No break in the bs */
2338         sizes[j] = bs;
2339         i += bs;
2340       } else {
2341         sizes[j] = 1;
2342         i++;
2343       }
2344     }
2345   }
2346   *bs_max = j;
2347   PetscFunctionReturn(PETSC_SUCCESS);
2348 }
2349 
2350 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2351 {
2352   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2353   PetscInt           i, j, k, count, *rows;
2354   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
2355   PetscScalar        zero = 0.0;
2356   MatScalar         *aa;
2357   const PetscScalar *xx;
2358   PetscScalar       *bb;
2359 
2360   PetscFunctionBegin;
2361   /* fix right-hand side if needed */
2362   if (x && b) {
2363     PetscCall(VecGetArrayRead(x, &xx));
2364     PetscCall(VecGetArray(b, &bb));
2365     for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
2366     PetscCall(VecRestoreArrayRead(x, &xx));
2367     PetscCall(VecRestoreArray(b, &bb));
2368   }
2369 
2370   /* Make a copy of the IS and  sort it */
2371   /* allocate memory for rows,sizes */
2372   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2373 
2374   /* copy IS values to rows, and sort them */
2375   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
2376   PetscCall(PetscSortInt(is_n, rows));
2377 
2378   if (baij->keepnonzeropattern) {
2379     for (i = 0; i < is_n; i++) sizes[i] = 1;
2380     bs_max = is_n;
2381   } else {
2382     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2383     A->nonzerostate++;
2384   }
2385 
2386   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2387     row = rows[j];
2388     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2389     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2390     aa    = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs);
2391     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2392       if (diag != (PetscScalar)0.0) {
2393         if (baij->ilen[row / bs] > 0) {
2394           baij->ilen[row / bs]       = 1;
2395           baij->j[baij->i[row / bs]] = row / bs;
2396 
2397           PetscCall(PetscArrayzero(aa, count * bs));
2398         }
2399         /* Now insert all the diagonal values for this bs */
2400         for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES);
2401       } else { /* (diag == 0.0) */
2402         baij->ilen[row / bs] = 0;
2403       } /* end (diag == 0.0) */
2404     } else { /* (sizes[i] != bs) */
2405       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2406       for (k = 0; k < count; k++) {
2407         aa[0] = zero;
2408         aa += bs;
2409       }
2410       if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES);
2411     }
2412   }
2413 
2414   PetscCall(PetscFree2(rows, sizes));
2415   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2416   PetscFunctionReturn(PETSC_SUCCESS);
2417 }
2418 
2419 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2420 {
2421   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2422   PetscInt           i, j, k, count;
2423   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
2424   PetscScalar        zero = 0.0;
2425   MatScalar         *aa;
2426   const PetscScalar *xx;
2427   PetscScalar       *bb;
2428   PetscBool         *zeroed, vecs = PETSC_FALSE;
2429 
2430   PetscFunctionBegin;
2431   /* fix right-hand side if needed */
2432   if (x && b) {
2433     PetscCall(VecGetArrayRead(x, &xx));
2434     PetscCall(VecGetArray(b, &bb));
2435     vecs = PETSC_TRUE;
2436   }
2437 
2438   /* zero the columns */
2439   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
2440   for (i = 0; i < is_n; i++) {
2441     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
2442     zeroed[is_idx[i]] = PETSC_TRUE;
2443   }
2444   for (i = 0; i < A->rmap->N; i++) {
2445     if (!zeroed[i]) {
2446       row = i / bs;
2447       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
2448         for (k = 0; k < bs; k++) {
2449           col = bs * baij->j[j] + k;
2450           if (zeroed[col]) {
2451             aa = ((MatScalar *)baij->a) + j * bs2 + (i % bs) + bs * k;
2452             if (vecs) bb[i] -= aa[0] * xx[col];
2453             aa[0] = 0.0;
2454           }
2455         }
2456       }
2457     } else if (vecs) bb[i] = diag * xx[i];
2458   }
2459   PetscCall(PetscFree(zeroed));
2460   if (vecs) {
2461     PetscCall(VecRestoreArrayRead(x, &xx));
2462     PetscCall(VecRestoreArray(b, &bb));
2463   }
2464 
2465   /* zero the rows */
2466   for (i = 0; i < is_n; i++) {
2467     row   = is_idx[i];
2468     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2469     aa    = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs);
2470     for (k = 0; k < count; k++) {
2471       aa[0] = zero;
2472       aa += bs;
2473     }
2474     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
2475   }
2476   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2477   PetscFunctionReturn(PETSC_SUCCESS);
2478 }
2479 
2480 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2481 {
2482   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2483   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2484   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2485   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2486   PetscInt     ridx, cidx, bs2                 = a->bs2;
2487   PetscBool    roworiented = a->roworiented;
2488   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;
2489 
2490   PetscFunctionBegin;
2491   for (k = 0; k < m; k++) { /* loop over added rows */
2492     row  = im[k];
2493     brow = row / bs;
2494     if (row < 0) continue;
2495     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
2496     rp = PetscSafePointerPlusOffset(aj, ai[brow]);
2497     if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
2498     rmax = imax[brow];
2499     nrow = ailen[brow];
2500     low  = 0;
2501     high = nrow;
2502     for (l = 0; l < n; l++) { /* loop over added columns */
2503       if (in[l] < 0) continue;
2504       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
2505       col  = in[l];
2506       bcol = col / bs;
2507       ridx = row % bs;
2508       cidx = col % bs;
2509       if (!A->structure_only) {
2510         if (roworiented) {
2511           value = v[l + k * n];
2512         } else {
2513           value = v[k + l * m];
2514         }
2515       }
2516       if (col <= lastcol) low = 0;
2517       else high = nrow;
2518       lastcol = col;
2519       while (high - low > 7) {
2520         t = (low + high) / 2;
2521         if (rp[t] > bcol) high = t;
2522         else low = t;
2523       }
2524       for (i = low; i < high; i++) {
2525         if (rp[i] > bcol) break;
2526         if (rp[i] == bcol) {
2527           bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx);
2528           if (!A->structure_only) {
2529             if (is == ADD_VALUES) *bap += value;
2530             else *bap = value;
2531           }
2532           goto noinsert1;
2533         }
2534       }
2535       if (nonew == 1) goto noinsert1;
2536       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2537       if (A->structure_only) {
2538         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2539       } else {
2540         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2541       }
2542       N = nrow++ - 1;
2543       high++;
2544       /* shift up all the later entries in this row */
2545       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2546       rp[i] = bcol;
2547       if (!A->structure_only) {
2548         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2549         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2550         ap[bs2 * i + bs * cidx + ridx] = value;
2551       }
2552       a->nz++;
2553     noinsert1:;
2554       low = i;
2555     }
2556     ailen[brow] = nrow;
2557   }
2558   PetscFunctionReturn(PETSC_SUCCESS);
2559 }
2560 
2561 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2562 {
2563   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
2564   Mat          outA;
2565   PetscBool    row_identity, col_identity;
2566 
2567   PetscFunctionBegin;
2568   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
2569   PetscCall(ISIdentity(row, &row_identity));
2570   PetscCall(ISIdentity(col, &col_identity));
2571   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
2572 
2573   outA            = inA;
2574   inA->factortype = MAT_FACTOR_LU;
2575   PetscCall(PetscFree(inA->solvertype));
2576   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
2577 
2578   PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2579 
2580   PetscCall(PetscObjectReference((PetscObject)row));
2581   PetscCall(ISDestroy(&a->row));
2582   a->row = row;
2583   PetscCall(PetscObjectReference((PetscObject)col));
2584   PetscCall(ISDestroy(&a->col));
2585   a->col = col;
2586 
2587   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2588   PetscCall(ISDestroy(&a->icol));
2589   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2590 
2591   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2592   if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
2593   PetscCall(MatLUFactorNumeric(outA, inA, info));
2594   PetscFunctionReturn(PETSC_SUCCESS);
2595 }
2596 
2597 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2598 {
2599   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2600 
2601   PetscFunctionBegin;
2602   baij->nz = baij->maxnz;
2603   PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2604   PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
2605   PetscFunctionReturn(PETSC_SUCCESS);
2606 }
2607 
2608 /*@
2609   MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix.
2610 
2611   Input Parameters:
2612 + mat     - the `MATSEQBAIJ` matrix
2613 - indices - the block column indices
2614 
2615   Level: advanced
2616 
2617   Notes:
2618   This can be called if you have precomputed the nonzero structure of the
2619   matrix and want to provide it to the matrix object to improve the performance
2620   of the `MatSetValues()` operation.
2621 
2622   You MUST have set the correct numbers of nonzeros per row in the call to
2623   `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
2624 
2625   MUST be called before any calls to `MatSetValues()`
2626 
2627 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()`
2628 @*/
2629 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2630 {
2631   PetscFunctionBegin;
2632   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2633   PetscAssertPointer(indices, 2);
2634   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices));
2635   PetscFunctionReturn(PETSC_SUCCESS);
2636 }
2637 
2638 static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2639 {
2640   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2641   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2642   PetscReal    atmp;
2643   PetscScalar *x, zero = 0.0;
2644   MatScalar   *aa;
2645   PetscInt     ncols, brow, krow, kcol;
2646 
2647   PetscFunctionBegin;
2648   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2649   bs  = A->rmap->bs;
2650   aa  = a->a;
2651   ai  = a->i;
2652   aj  = a->j;
2653   mbs = a->mbs;
2654 
2655   PetscCall(VecSet(v, zero));
2656   PetscCall(VecGetArray(v, &x));
2657   PetscCall(VecGetLocalSize(v, &n));
2658   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2659   for (i = 0; i < mbs; i++) {
2660     ncols = ai[1] - ai[0];
2661     ai++;
2662     brow = bs * i;
2663     for (j = 0; j < ncols; j++) {
2664       for (kcol = 0; kcol < bs; kcol++) {
2665         for (krow = 0; krow < bs; krow++) {
2666           atmp = PetscAbsScalar(*aa);
2667           aa++;
2668           row = brow + krow; /* row index */
2669           if (PetscAbsScalar(x[row]) < atmp) {
2670             x[row] = atmp;
2671             if (idx) idx[row] = bs * (*aj) + kcol;
2672           }
2673         }
2674       }
2675       aj++;
2676     }
2677   }
2678   PetscCall(VecRestoreArray(v, &x));
2679   PetscFunctionReturn(PETSC_SUCCESS);
2680 }
2681 
2682 static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v)
2683 {
2684   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2685   PetscInt     i, j, n, row, bs, *ai, mbs;
2686   PetscReal    atmp;
2687   PetscScalar *x, zero = 0.0;
2688   MatScalar   *aa;
2689   PetscInt     ncols, brow, krow, kcol;
2690 
2691   PetscFunctionBegin;
2692   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2693   bs  = A->rmap->bs;
2694   aa  = a->a;
2695   ai  = a->i;
2696   mbs = a->mbs;
2697 
2698   PetscCall(VecSet(v, zero));
2699   PetscCall(VecGetArrayWrite(v, &x));
2700   PetscCall(VecGetLocalSize(v, &n));
2701   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2702   for (i = 0; i < mbs; i++) {
2703     ncols = ai[1] - ai[0];
2704     ai++;
2705     brow = bs * i;
2706     for (j = 0; j < ncols; j++) {
2707       for (kcol = 0; kcol < bs; kcol++) {
2708         for (krow = 0; krow < bs; krow++) {
2709           atmp = PetscAbsScalar(*aa);
2710           aa++;
2711           row = brow + krow; /* row index */
2712           x[row] += atmp;
2713         }
2714       }
2715     }
2716   }
2717   PetscCall(VecRestoreArrayWrite(v, &x));
2718   PetscFunctionReturn(PETSC_SUCCESS);
2719 }
2720 
2721 static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2722 {
2723   PetscFunctionBegin;
2724   /* If the two matrices have the same copy implementation, use fast copy. */
2725   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2726     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
2727     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2728     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
2729 
2730     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
2731     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
2732     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
2733     PetscCall(PetscObjectStateIncrease((PetscObject)B));
2734   } else {
2735     PetscCall(MatCopy_Basic(A, B, str));
2736   }
2737   PetscFunctionReturn(PETSC_SUCCESS);
2738 }
2739 
2740 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2741 {
2742   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2743 
2744   PetscFunctionBegin;
2745   *array = a->a;
2746   PetscFunctionReturn(PETSC_SUCCESS);
2747 }
2748 
2749 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2750 {
2751   PetscFunctionBegin;
2752   *array = NULL;
2753   PetscFunctionReturn(PETSC_SUCCESS);
2754 }
2755 
2756 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2757 {
2758   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
2759   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
2760   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
2761 
2762   PetscFunctionBegin;
2763   /* Set the number of nonzeros in the new matrix */
2764   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
2765   PetscFunctionReturn(PETSC_SUCCESS);
2766 }
2767 
2768 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2769 {
2770   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
2771   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2772   PetscBLASInt one = 1;
2773 
2774   PetscFunctionBegin;
2775   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2776     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2777     if (e) {
2778       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2779       if (e) {
2780         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2781         if (e) str = SAME_NONZERO_PATTERN;
2782       }
2783     }
2784     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2785   }
2786   if (str == SAME_NONZERO_PATTERN) {
2787     PetscScalar  alpha = a;
2788     PetscBLASInt bnz;
2789     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2790     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
2791     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2792   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2793     PetscCall(MatAXPY_Basic(Y, a, X, str));
2794   } else {
2795     Mat       B;
2796     PetscInt *nnz;
2797     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
2798     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
2799     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2800     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2801     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
2802     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
2803     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
2804     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
2805     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
2806     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2807     PetscCall(MatHeaderMerge(Y, &B));
2808     PetscCall(PetscFree(nnz));
2809   }
2810   PetscFunctionReturn(PETSC_SUCCESS);
2811 }
2812 
2813 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2814 {
2815 #if PetscDefined(USE_COMPLEX)
2816   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2817   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2818   MatScalar   *aa = a->a;
2819 
2820   PetscFunctionBegin;
2821   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
2822   PetscFunctionReturn(PETSC_SUCCESS);
2823 #else
2824   (void)A;
2825   return PETSC_SUCCESS;
2826 #endif
2827 }
2828 
2829 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2830 {
2831 #if PetscDefined(USE_COMPLEX)
2832   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2833   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2834   MatScalar   *aa = a->a;
2835 
2836   PetscFunctionBegin;
2837   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
2838   PetscFunctionReturn(PETSC_SUCCESS);
2839 #else
2840   (void)A;
2841   return PETSC_SUCCESS;
2842 #endif
2843 }
2844 
2845 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2846 {
2847 #if PetscDefined(USE_COMPLEX)
2848   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2849   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2850   MatScalar   *aa = a->a;
2851 
2852   PetscFunctionBegin;
2853   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2854   PetscFunctionReturn(PETSC_SUCCESS);
2855 #else
2856   (void)A;
2857   return PETSC_SUCCESS;
2858 #endif
2859 }
2860 
2861 /*
2862     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2863 */
2864 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2865 {
2866   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
2867   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
2868   PetscInt     nz = a->i[m], row, *jj, mr, col;
2869 
2870   PetscFunctionBegin;
2871   *nn = n;
2872   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2873   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
2874   PetscCall(PetscCalloc1(n, &collengths));
2875   PetscCall(PetscMalloc1(n + 1, &cia));
2876   PetscCall(PetscMalloc1(nz, &cja));
2877   jj = a->j;
2878   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2879   cia[0] = oshift;
2880   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2881   PetscCall(PetscArrayzero(collengths, n));
2882   jj = a->j;
2883   for (row = 0; row < m; row++) {
2884     mr = a->i[row + 1] - a->i[row];
2885     for (i = 0; i < mr; i++) {
2886       col = *jj++;
2887 
2888       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2889     }
2890   }
2891   PetscCall(PetscFree(collengths));
2892   *ia = cia;
2893   *ja = cja;
2894   PetscFunctionReturn(PETSC_SUCCESS);
2895 }
2896 
2897 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2898 {
2899   PetscFunctionBegin;
2900   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2901   PetscCall(PetscFree(*ia));
2902   PetscCall(PetscFree(*ja));
2903   PetscFunctionReturn(PETSC_SUCCESS);
2904 }
2905 
2906 /*
2907  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2908  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2909  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2910  */
2911 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2912 {
2913   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2914   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2915   PetscInt     nz = a->i[m], row, *jj, mr, col;
2916   PetscInt    *cspidx;
2917 
2918   PetscFunctionBegin;
2919   *nn = n;
2920   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2921 
2922   PetscCall(PetscCalloc1(n, &collengths));
2923   PetscCall(PetscMalloc1(n + 1, &cia));
2924   PetscCall(PetscMalloc1(nz, &cja));
2925   PetscCall(PetscMalloc1(nz, &cspidx));
2926   jj = a->j;
2927   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2928   cia[0] = oshift;
2929   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2930   PetscCall(PetscArrayzero(collengths, n));
2931   jj = a->j;
2932   for (row = 0; row < m; row++) {
2933     mr = a->i[row + 1] - a->i[row];
2934     for (i = 0; i < mr; i++) {
2935       col                                         = *jj++;
2936       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2937       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2938     }
2939   }
2940   PetscCall(PetscFree(collengths));
2941   *ia    = cia;
2942   *ja    = cja;
2943   *spidx = cspidx;
2944   PetscFunctionReturn(PETSC_SUCCESS);
2945 }
2946 
2947 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2948 {
2949   PetscFunctionBegin;
2950   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
2951   PetscCall(PetscFree(*spidx));
2952   PetscFunctionReturn(PETSC_SUCCESS);
2953 }
2954 
2955 static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2956 {
2957   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
2958 
2959   PetscFunctionBegin;
2960   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
2961   PetscCall(MatShift_Basic(Y, a));
2962   PetscFunctionReturn(PETSC_SUCCESS);
2963 }
2964 
2965 PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep)
2966 {
2967   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2968   PetscInt     fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k;
2969   PetscInt     m = A->rmap->N, *ailen = a->ilen;
2970   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2971   MatScalar   *aa = a->a, *ap;
2972   PetscBool    zero;
2973 
2974   PetscFunctionBegin;
2975   PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix");
2976   if (m) rmax = ailen[0];
2977   for (i = 1; i <= mbs; i++) {
2978     for (k = ai[i - 1]; k < ai[i]; k++) {
2979       zero = PETSC_TRUE;
2980       ap   = aa + bs2 * k;
2981       for (j = 0; j < bs2 && zero; j++) {
2982         if (ap[j] != 0.0) zero = PETSC_FALSE;
2983       }
2984       if (zero && (aj[k] != i - 1 || !keep)) fshift++;
2985       else {
2986         if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1));
2987         aj[k - fshift] = aj[k];
2988         PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2));
2989       }
2990     }
2991     ai[i - 1] -= fshift_prev;
2992     fshift_prev  = fshift;
2993     ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1];
2994     a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0);
2995     rmax = PetscMax(rmax, ailen[i - 1]);
2996   }
2997   if (fshift) {
2998     if (mbs) {
2999       ai[mbs] -= fshift;
3000       a->nz = ai[mbs];
3001     }
3002     PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz));
3003     A->nonzerostate++;
3004     A->info.nz_unneeded += (PetscReal)fshift;
3005     a->rmax = rmax;
3006     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
3007     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
3008   }
3009   PetscFunctionReturn(PETSC_SUCCESS);
3010 }
3011 
3012 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
3013                                        MatGetRow_SeqBAIJ,
3014                                        MatRestoreRow_SeqBAIJ,
3015                                        MatMult_SeqBAIJ_N,
3016                                        /* 4*/ MatMultAdd_SeqBAIJ_N,
3017                                        MatMultTranspose_SeqBAIJ,
3018                                        MatMultTransposeAdd_SeqBAIJ,
3019                                        NULL,
3020                                        NULL,
3021                                        NULL,
3022                                        /* 10*/ NULL,
3023                                        MatLUFactor_SeqBAIJ,
3024                                        NULL,
3025                                        NULL,
3026                                        MatTranspose_SeqBAIJ,
3027                                        /* 15*/ MatGetInfo_SeqBAIJ,
3028                                        MatEqual_SeqBAIJ,
3029                                        MatGetDiagonal_SeqBAIJ,
3030                                        MatDiagonalScale_SeqBAIJ,
3031                                        MatNorm_SeqBAIJ,
3032                                        /* 20*/ NULL,
3033                                        MatAssemblyEnd_SeqBAIJ,
3034                                        MatSetOption_SeqBAIJ,
3035                                        MatZeroEntries_SeqBAIJ,
3036                                        /* 24*/ MatZeroRows_SeqBAIJ,
3037                                        NULL,
3038                                        NULL,
3039                                        NULL,
3040                                        NULL,
3041                                        /* 29*/ MatSetUp_Seq_Hash,
3042                                        NULL,
3043                                        NULL,
3044                                        NULL,
3045                                        NULL,
3046                                        /* 34*/ MatDuplicate_SeqBAIJ,
3047                                        NULL,
3048                                        NULL,
3049                                        MatILUFactor_SeqBAIJ,
3050                                        NULL,
3051                                        /* 39*/ MatAXPY_SeqBAIJ,
3052                                        MatCreateSubMatrices_SeqBAIJ,
3053                                        MatIncreaseOverlap_SeqBAIJ,
3054                                        MatGetValues_SeqBAIJ,
3055                                        MatCopy_SeqBAIJ,
3056                                        /* 44*/ NULL,
3057                                        MatScale_SeqBAIJ,
3058                                        MatShift_SeqBAIJ,
3059                                        NULL,
3060                                        MatZeroRowsColumns_SeqBAIJ,
3061                                        /* 49*/ NULL,
3062                                        MatGetRowIJ_SeqBAIJ,
3063                                        MatRestoreRowIJ_SeqBAIJ,
3064                                        MatGetColumnIJ_SeqBAIJ,
3065                                        MatRestoreColumnIJ_SeqBAIJ,
3066                                        /* 54*/ MatFDColoringCreate_SeqXAIJ,
3067                                        NULL,
3068                                        NULL,
3069                                        NULL,
3070                                        MatSetValuesBlocked_SeqBAIJ,
3071                                        /* 59*/ MatCreateSubMatrix_SeqBAIJ,
3072                                        MatDestroy_SeqBAIJ,
3073                                        MatView_SeqBAIJ,
3074                                        NULL,
3075                                        NULL,
3076                                        /* 64*/ NULL,
3077                                        NULL,
3078                                        NULL,
3079                                        NULL,
3080                                        NULL,
3081                                        /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
3082                                        NULL,
3083                                        MatConvert_Basic,
3084                                        NULL,
3085                                        NULL,
3086                                        /* 74*/ NULL,
3087                                        MatFDColoringApply_BAIJ,
3088                                        NULL,
3089                                        NULL,
3090                                        NULL,
3091                                        /* 79*/ NULL,
3092                                        NULL,
3093                                        NULL,
3094                                        NULL,
3095                                        MatLoad_SeqBAIJ,
3096                                        /* 84*/ NULL,
3097                                        NULL,
3098                                        NULL,
3099                                        NULL,
3100                                        NULL,
3101                                        /* 89*/ NULL,
3102                                        NULL,
3103                                        NULL,
3104                                        NULL,
3105                                        NULL,
3106                                        /* 94*/ NULL,
3107                                        NULL,
3108                                        NULL,
3109                                        NULL,
3110                                        NULL,
3111                                        /* 99*/ NULL,
3112                                        NULL,
3113                                        NULL,
3114                                        MatConjugate_SeqBAIJ,
3115                                        NULL,
3116                                        /*104*/ NULL,
3117                                        MatRealPart_SeqBAIJ,
3118                                        MatImaginaryPart_SeqBAIJ,
3119                                        NULL,
3120                                        NULL,
3121                                        /*109*/ NULL,
3122                                        NULL,
3123                                        NULL,
3124                                        NULL,
3125                                        MatMissingDiagonal_SeqBAIJ,
3126                                        /*114*/ NULL,
3127                                        NULL,
3128                                        NULL,
3129                                        NULL,
3130                                        NULL,
3131                                        /*119*/ NULL,
3132                                        NULL,
3133                                        MatMultHermitianTranspose_SeqBAIJ,
3134                                        MatMultHermitianTransposeAdd_SeqBAIJ,
3135                                        NULL,
3136                                        /*124*/ NULL,
3137                                        MatGetColumnReductions_SeqBAIJ,
3138                                        MatInvertBlockDiagonal_SeqBAIJ,
3139                                        NULL,
3140                                        NULL,
3141                                        /*129*/ NULL,
3142                                        NULL,
3143                                        NULL,
3144                                        NULL,
3145                                        NULL,
3146                                        /*134*/ NULL,
3147                                        NULL,
3148                                        NULL,
3149                                        NULL,
3150                                        NULL,
3151                                        /*139*/ MatSetBlockSizes_Default,
3152                                        NULL,
3153                                        NULL,
3154                                        MatFDColoringSetUp_SeqXAIJ,
3155                                        NULL,
3156                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3157                                        MatDestroySubMatrices_SeqBAIJ,
3158                                        NULL,
3159                                        NULL,
3160                                        NULL,
3161                                        NULL,
3162                                        /*150*/ NULL,
3163                                        MatEliminateZeros_SeqBAIJ,
3164                                        MatGetRowSumAbs_SeqBAIJ,
3165                                        NULL,
3166                                        NULL,
3167                                        NULL};
3168 
3169 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3170 {
3171   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3172   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3173 
3174   PetscFunctionBegin;
3175   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3176 
3177   /* allocate space for values if not already there */
3178   if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
3179 
3180   /* copy values over */
3181   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
3182   PetscFunctionReturn(PETSC_SUCCESS);
3183 }
3184 
3185 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3186 {
3187   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3188   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3189 
3190   PetscFunctionBegin;
3191   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3192   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
3193 
3194   /* copy values over */
3195   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
3196   PetscFunctionReturn(PETSC_SUCCESS);
3197 }
3198 
3199 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3200 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3201 
3202 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3203 {
3204   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
3205   PetscInt     i, mbs, nbs, bs2;
3206   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3207 
3208   PetscFunctionBegin;
3209   if (B->hash_active) {
3210     PetscInt bs;
3211     B->ops[0] = b->cops;
3212     PetscCall(PetscHMapIJVDestroy(&b->ht));
3213     PetscCall(MatGetBlockSize(B, &bs));
3214     if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht));
3215     PetscCall(PetscFree(b->dnz));
3216     PetscCall(PetscFree(b->bdnz));
3217     B->hash_active = PETSC_FALSE;
3218   }
3219   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3220   if (nz == MAT_SKIP_ALLOCATION) {
3221     skipallocation = PETSC_TRUE;
3222     nz             = 0;
3223   }
3224 
3225   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
3226   PetscCall(PetscLayoutSetUp(B->rmap));
3227   PetscCall(PetscLayoutSetUp(B->cmap));
3228   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3229 
3230   B->preallocated = PETSC_TRUE;
3231 
3232   mbs = B->rmap->n / bs;
3233   nbs = B->cmap->n / bs;
3234   bs2 = bs * bs;
3235 
3236   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3237 
3238   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3239   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3240   if (nnz) {
3241     for (i = 0; i < mbs; i++) {
3242       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
3243       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3244     }
3245   }
3246 
3247   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
3248   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3249   PetscOptionsEnd();
3250 
3251   if (!flg) {
3252     switch (bs) {
3253     case 1:
3254       B->ops->mult    = MatMult_SeqBAIJ_1;
3255       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3256       break;
3257     case 2:
3258       B->ops->mult    = MatMult_SeqBAIJ_2;
3259       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3260       break;
3261     case 3:
3262       B->ops->mult    = MatMult_SeqBAIJ_3;
3263       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3264       break;
3265     case 4:
3266       B->ops->mult    = MatMult_SeqBAIJ_4;
3267       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3268       break;
3269     case 5:
3270       B->ops->mult    = MatMult_SeqBAIJ_5;
3271       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3272       break;
3273     case 6:
3274       B->ops->mult    = MatMult_SeqBAIJ_6;
3275       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3276       break;
3277     case 7:
3278       B->ops->mult    = MatMult_SeqBAIJ_7;
3279       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3280       break;
3281     case 9: {
3282       PetscInt version = 1;
3283       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3284       switch (version) {
3285 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3286       case 1:
3287         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
3288         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
3289         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3290         break;
3291 #endif
3292       default:
3293         B->ops->mult    = MatMult_SeqBAIJ_N;
3294         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3295         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3296         break;
3297       }
3298       break;
3299     }
3300     case 11:
3301       B->ops->mult    = MatMult_SeqBAIJ_11;
3302       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3303       break;
3304     case 12: {
3305       PetscInt version = 1;
3306       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3307       switch (version) {
3308       case 1:
3309         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
3310         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3311         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3312         break;
3313       case 2:
3314         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
3315         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
3316         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3317         break;
3318 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3319       case 3:
3320         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
3321         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3322         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3323         break;
3324 #endif
3325       default:
3326         B->ops->mult    = MatMult_SeqBAIJ_N;
3327         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3328         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3329         break;
3330       }
3331       break;
3332     }
3333     case 15: {
3334       PetscInt version = 1;
3335       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3336       switch (version) {
3337       case 1:
3338         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3339         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3340         break;
3341       case 2:
3342         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
3343         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3344         break;
3345       case 3:
3346         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3347         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3348         break;
3349       case 4:
3350         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3351         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3352         break;
3353       default:
3354         B->ops->mult = MatMult_SeqBAIJ_N;
3355         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3356         break;
3357       }
3358       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3359       break;
3360     }
3361     default:
3362       B->ops->mult    = MatMult_SeqBAIJ_N;
3363       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3364       PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3365       break;
3366     }
3367   }
3368   B->ops->sor = MatSOR_SeqBAIJ;
3369   b->mbs      = mbs;
3370   b->nbs      = nbs;
3371   if (!skipallocation) {
3372     if (!b->imax) {
3373       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
3374 
3375       b->free_imax_ilen = PETSC_TRUE;
3376     }
3377     /* b->ilen will count nonzeros in each block row so far. */
3378     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3379     if (!nnz) {
3380       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3381       else if (nz < 0) nz = 1;
3382       nz = PetscMin(nz, nbs);
3383       for (i = 0; i < mbs; i++) b->imax[i] = nz;
3384       PetscCall(PetscIntMultError(nz, mbs, &nz));
3385     } else {
3386       PetscInt64 nz64 = 0;
3387       for (i = 0; i < mbs; i++) {
3388         b->imax[i] = nnz[i];
3389         nz64 += nnz[i];
3390       }
3391       PetscCall(PetscIntCast(nz64, &nz));
3392     }
3393 
3394     /* allocate the matrix space */
3395     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3396     PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j));
3397     PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i));
3398     if (B->structure_only) {
3399       b->free_a = PETSC_FALSE;
3400     } else {
3401       PetscInt nzbs2 = 0;
3402       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
3403       PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a));
3404       b->free_a = PETSC_TRUE;
3405       PetscCall(PetscArrayzero(b->a, nz * bs2));
3406     }
3407     b->free_ij = PETSC_TRUE;
3408     PetscCall(PetscArrayzero(b->j, nz));
3409 
3410     b->i[0] = 0;
3411     for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3412   } else {
3413     b->free_a  = PETSC_FALSE;
3414     b->free_ij = PETSC_FALSE;
3415   }
3416 
3417   b->bs2              = bs2;
3418   b->mbs              = mbs;
3419   b->nz               = 0;
3420   b->maxnz            = nz;
3421   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3422   B->was_assembled    = PETSC_FALSE;
3423   B->assembled        = PETSC_FALSE;
3424   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3425   PetscFunctionReturn(PETSC_SUCCESS);
3426 }
3427 
3428 static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3429 {
3430   PetscInt     i, m, nz, nz_max = 0, *nnz;
3431   PetscScalar *values      = NULL;
3432   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3433 
3434   PetscFunctionBegin;
3435   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
3436   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
3437   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
3438   PetscCall(PetscLayoutSetUp(B->rmap));
3439   PetscCall(PetscLayoutSetUp(B->cmap));
3440   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3441   m = B->rmap->n / bs;
3442 
3443   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
3444   PetscCall(PetscMalloc1(m + 1, &nnz));
3445   for (i = 0; i < m; i++) {
3446     nz = ii[i + 1] - ii[i];
3447     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3448     nz_max = PetscMax(nz_max, nz);
3449     nnz[i] = nz;
3450   }
3451   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
3452   PetscCall(PetscFree(nnz));
3453 
3454   values = (PetscScalar *)V;
3455   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3456   for (i = 0; i < m; i++) {
3457     PetscInt        ncols = ii[i + 1] - ii[i];
3458     const PetscInt *icols = jj + ii[i];
3459     if (bs == 1 || !roworiented) {
3460       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
3461       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
3462     } else {
3463       PetscInt j;
3464       for (j = 0; j < ncols; j++) {
3465         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
3466         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
3467       }
3468     }
3469   }
3470   if (!V) PetscCall(PetscFree(values));
3471   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3472   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3473   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3474   PetscFunctionReturn(PETSC_SUCCESS);
3475 }
3476 
3477 /*@C
3478   MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3479 
3480   Not Collective
3481 
3482   Input Parameter:
3483 . A - a `MATSEQBAIJ` matrix
3484 
3485   Output Parameter:
3486 . array - pointer to the data
3487 
3488   Level: intermediate
3489 
3490 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3491 @*/
3492 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[])
3493 {
3494   PetscFunctionBegin;
3495   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
3496   PetscFunctionReturn(PETSC_SUCCESS);
3497 }
3498 
3499 /*@C
3500   MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3501 
3502   Not Collective
3503 
3504   Input Parameters:
3505 + A     - a `MATSEQBAIJ` matrix
3506 - array - pointer to the data
3507 
3508   Level: intermediate
3509 
3510 .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3511 @*/
3512 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[])
3513 {
3514   PetscFunctionBegin;
3515   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
3516   PetscFunctionReturn(PETSC_SUCCESS);
3517 }
3518 
3519 /*MC
3520    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3521    block sparse compressed row format.
3522 
3523    Options Database Keys:
3524 + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()`
3525 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3526 
3527    Level: beginner
3528 
3529    Notes:
3530    `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
3531    space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
3532 
3533    Run with `-info` to see what version of the matrix-vector product is being used
3534 
3535 .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()`
3536 M*/
3537 
3538 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3539 
3540 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3541 {
3542   PetscMPIInt  size;
3543   Mat_SeqBAIJ *b;
3544 
3545   PetscFunctionBegin;
3546   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
3547   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3548 
3549   PetscCall(PetscNew(&b));
3550   B->data   = (void *)b;
3551   B->ops[0] = MatOps_Values;
3552 
3553   b->row          = NULL;
3554   b->col          = NULL;
3555   b->icol         = NULL;
3556   b->reallocs     = 0;
3557   b->saved_values = NULL;
3558 
3559   b->roworiented        = PETSC_TRUE;
3560   b->nonew              = 0;
3561   b->diag               = NULL;
3562   B->spptr              = NULL;
3563   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3564   b->keepnonzeropattern = PETSC_FALSE;
3565 
3566   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
3567   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
3568   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
3569   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
3570   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
3571   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
3572   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
3573   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
3574   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
3575   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
3576 #if defined(PETSC_HAVE_HYPRE)
3577   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
3578 #endif
3579   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
3580   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
3581   PetscFunctionReturn(PETSC_SUCCESS);
3582 }
3583 
3584 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3585 {
3586   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3587   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3588 
3589   PetscFunctionBegin;
3590   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix");
3591   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
3592 
3593   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3594     c->imax           = a->imax;
3595     c->ilen           = a->ilen;
3596     c->free_imax_ilen = PETSC_FALSE;
3597   } else {
3598     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3599     for (i = 0; i < mbs; i++) {
3600       c->imax[i] = a->imax[i];
3601       c->ilen[i] = a->ilen[i];
3602     }
3603     c->free_imax_ilen = PETSC_TRUE;
3604   }
3605 
3606   /* allocate the matrix space */
3607   if (mallocmatspace) {
3608     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3609       PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
3610       PetscCall(PetscArrayzero(c->a, bs2 * nz));
3611       c->free_a       = PETSC_TRUE;
3612       c->i            = a->i;
3613       c->j            = a->j;
3614       c->free_ij      = PETSC_FALSE;
3615       c->parent       = A;
3616       C->preallocated = PETSC_TRUE;
3617       C->assembled    = PETSC_TRUE;
3618 
3619       PetscCall(PetscObjectReference((PetscObject)A));
3620       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3621       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3622     } else {
3623       PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
3624       PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j));
3625       PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i));
3626       c->free_a  = PETSC_TRUE;
3627       c->free_ij = PETSC_TRUE;
3628 
3629       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3630       if (mbs > 0) {
3631         PetscCall(PetscArraycpy(c->j, a->j, nz));
3632         if (cpvalues == MAT_COPY_VALUES) {
3633           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
3634         } else {
3635           PetscCall(PetscArrayzero(c->a, bs2 * nz));
3636         }
3637       }
3638       C->preallocated = PETSC_TRUE;
3639       C->assembled    = PETSC_TRUE;
3640     }
3641   }
3642 
3643   c->roworiented = a->roworiented;
3644   c->nonew       = a->nonew;
3645 
3646   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
3647   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
3648 
3649   c->bs2 = a->bs2;
3650   c->mbs = a->mbs;
3651   c->nbs = a->nbs;
3652 
3653   if (a->diag) {
3654     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3655       c->diag      = a->diag;
3656       c->free_diag = PETSC_FALSE;
3657     } else {
3658       PetscCall(PetscMalloc1(mbs + 1, &c->diag));
3659       for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i];
3660       c->free_diag = PETSC_TRUE;
3661     }
3662   } else c->diag = NULL;
3663 
3664   c->nz         = a->nz;
3665   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3666   c->solve_work = NULL;
3667   c->mult_work  = NULL;
3668   c->sor_workt  = NULL;
3669   c->sor_work   = NULL;
3670 
3671   c->compressedrow.use   = a->compressedrow.use;
3672   c->compressedrow.nrows = a->compressedrow.nrows;
3673   if (a->compressedrow.use) {
3674     i = a->compressedrow.nrows;
3675     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
3676     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
3677     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
3678   } else {
3679     c->compressedrow.use    = PETSC_FALSE;
3680     c->compressedrow.i      = NULL;
3681     c->compressedrow.rindex = NULL;
3682   }
3683   c->nonzerorowcnt = a->nonzerorowcnt;
3684   C->nonzerostate  = A->nonzerostate;
3685 
3686   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
3687   PetscFunctionReturn(PETSC_SUCCESS);
3688 }
3689 
3690 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3691 {
3692   PetscFunctionBegin;
3693   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
3694   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
3695   PetscCall(MatSetType(*B, MATSEQBAIJ));
3696   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
3697   PetscFunctionReturn(PETSC_SUCCESS);
3698 }
3699 
3700 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3701 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3702 {
3703   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3704   PetscInt    *rowidxs, *colidxs;
3705   PetscScalar *matvals;
3706 
3707   PetscFunctionBegin;
3708   PetscCall(PetscViewerSetUp(viewer));
3709 
3710   /* read matrix header */
3711   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3712   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3713   M  = header[1];
3714   N  = header[2];
3715   nz = header[3];
3716   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3717   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3718   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3719 
3720   /* set block sizes from the viewer's .info file */
3721   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3722   /* set local and global sizes if not set already */
3723   if (mat->rmap->n < 0) mat->rmap->n = M;
3724   if (mat->cmap->n < 0) mat->cmap->n = N;
3725   if (mat->rmap->N < 0) mat->rmap->N = M;
3726   if (mat->cmap->N < 0) mat->cmap->N = N;
3727   PetscCall(PetscLayoutSetUp(mat->rmap));
3728   PetscCall(PetscLayoutSetUp(mat->cmap));
3729 
3730   /* check if the matrix sizes are correct */
3731   PetscCall(MatGetSize(mat, &rows, &cols));
3732   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3733   PetscCall(MatGetBlockSize(mat, &bs));
3734   PetscCall(MatGetLocalSize(mat, &m, &n));
3735   mbs = m / bs;
3736   nbs = n / bs;
3737 
3738   /* read in row lengths, column indices and nonzero values */
3739   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3740   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
3741   rowidxs[0] = 0;
3742   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3743   sum = rowidxs[m];
3744   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3745 
3746   /* read in column indices and nonzero values */
3747   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
3748   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
3749   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3750 
3751   {               /* preallocate matrix storage */
3752     PetscBT   bt; /* helper bit set to count nonzeros */
3753     PetscInt *nnz;
3754     PetscBool sbaij;
3755 
3756     PetscCall(PetscBTCreate(nbs, &bt));
3757     PetscCall(PetscCalloc1(mbs, &nnz));
3758     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3759     for (i = 0; i < mbs; i++) {
3760       PetscCall(PetscBTMemzero(nbs, bt));
3761       for (k = 0; k < bs; k++) {
3762         PetscInt row = bs * i + k;
3763         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3764           PetscInt col = colidxs[j];
3765           if (!sbaij || col >= row)
3766             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3767         }
3768       }
3769     }
3770     PetscCall(PetscBTDestroy(&bt));
3771     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
3772     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
3773     PetscCall(PetscFree(nnz));
3774   }
3775 
3776   /* store matrix values */
3777   for (i = 0; i < m; i++) {
3778     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
3779     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3780   }
3781 
3782   PetscCall(PetscFree(rowidxs));
3783   PetscCall(PetscFree2(colidxs, matvals));
3784   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3785   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3786   PetscFunctionReturn(PETSC_SUCCESS);
3787 }
3788 
3789 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3790 {
3791   PetscBool isbinary;
3792 
3793   PetscFunctionBegin;
3794   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3795   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3796   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
3797   PetscFunctionReturn(PETSC_SUCCESS);
3798 }
3799 
3800 /*@
3801   MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3802   compressed row) format.  For good matrix assembly performance the
3803   user should preallocate the matrix storage by setting the parameter `nz`
3804   (or the array `nnz`).
3805 
3806   Collective
3807 
3808   Input Parameters:
3809 + comm - MPI communicator, set to `PETSC_COMM_SELF`
3810 . bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3811          blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3812 . m    - number of rows
3813 . n    - number of columns
3814 . nz   - number of nonzero blocks  per block row (same for all rows)
3815 - nnz  - array containing the number of nonzero blocks in the various block rows
3816          (possibly different for each block row) or `NULL`
3817 
3818   Output Parameter:
3819 . A - the matrix
3820 
3821   Options Database Keys:
3822 + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3823 - -mat_block_size - size of the blocks to use
3824 
3825   Level: intermediate
3826 
3827   Notes:
3828   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3829   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3830   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
3831 
3832   The number of rows and columns must be divisible by blocksize.
3833 
3834   If the `nnz` parameter is given then the `nz` parameter is ignored
3835 
3836   A nonzero block is any block that as 1 or more nonzeros in it
3837 
3838   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3839   storage.  That is, the stored row and column indices can begin at
3840   either one (as in Fortran) or zero.
3841 
3842   Specify the preallocated storage with either `nz` or `nnz` (not both).
3843   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3844   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3845   matrices.
3846 
3847 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3848 @*/
3849 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3850 {
3851   PetscFunctionBegin;
3852   PetscCall(MatCreate(comm, A));
3853   PetscCall(MatSetSizes(*A, m, n, m, n));
3854   PetscCall(MatSetType(*A, MATSEQBAIJ));
3855   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
3856   PetscFunctionReturn(PETSC_SUCCESS);
3857 }
3858 
3859 /*@
3860   MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3861   per row in the matrix. For good matrix assembly performance the
3862   user should preallocate the matrix storage by setting the parameter `nz`
3863   (or the array `nnz`).
3864 
3865   Collective
3866 
3867   Input Parameters:
3868 + B   - the matrix
3869 . bs  - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3870         blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3871 . nz  - number of block nonzeros per block row (same for all rows)
3872 - nnz - array containing the number of block nonzeros in the various block rows
3873         (possibly different for each block row) or `NULL`
3874 
3875   Options Database Keys:
3876 + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3877 - -mat_block_size - size of the blocks to use
3878 
3879   Level: intermediate
3880 
3881   Notes:
3882   If the `nnz` parameter is given then the `nz` parameter is ignored
3883 
3884   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3885   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3886   You can also run with the option `-info` and look for messages with the string
3887   malloc in them to see if additional memory allocation was needed.
3888 
3889   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3890   storage.  That is, the stored row and column indices can begin at
3891   either one (as in Fortran) or zero.
3892 
3893   Specify the preallocated storage with either `nz` or `nnz` (not both).
3894   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3895   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3896 
3897 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3898 @*/
3899 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3900 {
3901   PetscFunctionBegin;
3902   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3903   PetscValidType(B, 1);
3904   PetscValidLogicalCollectiveInt(B, bs, 2);
3905   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
3906   PetscFunctionReturn(PETSC_SUCCESS);
3907 }
3908 
3909 /*@C
3910   MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3911 
3912   Collective
3913 
3914   Input Parameters:
3915 + B  - the matrix
3916 . bs - the blocksize
3917 . i  - the indices into `j` for the start of each local row (indices start with zero)
3918 . j  - the column indices for each local row (indices start with zero) these must be sorted for each row
3919 - v  - optional values in the matrix, use `NULL` if not provided
3920 
3921   Level: advanced
3922 
3923   Notes:
3924   The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()`
3925 
3926   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
3927   may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
3928   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3929   `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3930   block column and the second index is over columns within a block.
3931 
3932   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3933 
3934 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3935 @*/
3936 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3937 {
3938   PetscFunctionBegin;
3939   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3940   PetscValidType(B, 1);
3941   PetscValidLogicalCollectiveInt(B, bs, 2);
3942   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
3943   PetscFunctionReturn(PETSC_SUCCESS);
3944 }
3945 
3946 /*@
3947   MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3948 
3949   Collective
3950 
3951   Input Parameters:
3952 + comm - must be an MPI communicator of size 1
3953 . bs   - size of block
3954 . m    - number of rows
3955 . n    - number of columns
3956 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3957 . j    - column indices
3958 - a    - matrix values
3959 
3960   Output Parameter:
3961 . mat - the matrix
3962 
3963   Level: advanced
3964 
3965   Notes:
3966   The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays
3967   once the matrix is destroyed
3968 
3969   You cannot set new nonzero locations into this matrix, that will generate an error.
3970 
3971   The `i` and `j` indices are 0 based
3972 
3973   When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3974 
3975   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3976   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3977   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3978   with column-major ordering within blocks.
3979 
3980 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3981 @*/
3982 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3983 {
3984   Mat_SeqBAIJ *baij;
3985 
3986   PetscFunctionBegin;
3987   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
3988   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3989 
3990   PetscCall(MatCreate(comm, mat));
3991   PetscCall(MatSetSizes(*mat, m, n, m, n));
3992   PetscCall(MatSetType(*mat, MATSEQBAIJ));
3993   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3994   baij = (Mat_SeqBAIJ *)(*mat)->data;
3995   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3996 
3997   baij->i = i;
3998   baij->j = j;
3999   baij->a = a;
4000 
4001   baij->nonew          = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
4002   baij->free_a         = PETSC_FALSE;
4003   baij->free_ij        = PETSC_FALSE;
4004   baij->free_imax_ilen = PETSC_TRUE;
4005 
4006   for (PetscInt ii = 0; ii < m; ii++) {
4007     const PetscInt row_len = i[ii + 1] - i[ii];
4008 
4009     baij->ilen[ii] = baij->imax[ii] = row_len;
4010     PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
4011   }
4012   if (PetscDefined(USE_DEBUG)) {
4013     for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
4014       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
4015       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
4016     }
4017   }
4018 
4019   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
4020   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
4021   PetscFunctionReturn(PETSC_SUCCESS);
4022 }
4023 
4024 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4025 {
4026   PetscFunctionBegin;
4027   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
4028   PetscFunctionReturn(PETSC_SUCCESS);
4029 }
4030