xref: /petsc/src/mat/impls/baij/seq/baij.c (revision a4e35b1925eceef64945ea472b84f2bf06a67b5e)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h> /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */
12 #define TYPE BAIJ
13 #define TYPE_BS
14 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
15 #undef TYPE_BS
16 #define TYPE_BS _BS
17 #define TYPE_BS_ON
18 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
19 #undef TYPE_BS
20 #include "../src/mat/impls/aij/seq/seqhashmat.h"
21 #undef TYPE
22 #undef TYPE_BS_ON
23 
24 #if defined(PETSC_HAVE_HYPRE)
25 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
26 #endif
27 
28 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
29 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
30 #endif
31 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
32 
33 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
34 {
35   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
36   PetscInt     m, n, ib, jb, bs = A->rmap->bs;
37   MatScalar   *a_val = a_aij->a;
38 
39   PetscFunctionBegin;
40   PetscCall(MatGetSize(A, &m, &n));
41   PetscCall(PetscArrayzero(reductions, n));
42   if (type == NORM_2) {
43     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
44       for (jb = 0; jb < bs; jb++) {
45         for (ib = 0; ib < bs; ib++) {
46           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
47           a_val++;
48         }
49       }
50     }
51   } else if (type == NORM_1) {
52     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
53       for (jb = 0; jb < bs; jb++) {
54         for (ib = 0; ib < bs; ib++) {
55           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
56           a_val++;
57         }
58       }
59     }
60   } else if (type == NORM_INFINITY) {
61     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
62       for (jb = 0; jb < bs; jb++) {
63         for (ib = 0; ib < bs; ib++) {
64           int col         = A->cmap->rstart + a_aij->j[i] * bs + jb;
65           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
66           a_val++;
67         }
68       }
69     }
70   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
71     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
72       for (jb = 0; jb < bs; jb++) {
73         for (ib = 0; ib < bs; ib++) {
74           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
75           a_val++;
76         }
77       }
78     }
79   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
80     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
81       for (jb = 0; jb < bs; jb++) {
82         for (ib = 0; ib < bs; ib++) {
83           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
84           a_val++;
85         }
86       }
87     }
88   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
89   if (type == NORM_2) {
90     for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
91   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
92     for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
93   }
94   PetscFunctionReturn(PETSC_SUCCESS);
95 }
96 
97 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
98 {
99   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
100   PetscInt    *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
101   MatScalar   *v     = a->a, *odiag, *diag, work[25], *v_work;
102   PetscReal    shift = 0.0;
103   PetscBool    allowzeropivot, zeropivotdetected = PETSC_FALSE;
104 
105   PetscFunctionBegin;
106   allowzeropivot = PetscNot(A->erroriffailure);
107 
108   if (a->idiagvalid) {
109     if (values) *values = a->idiag;
110     PetscFunctionReturn(PETSC_SUCCESS);
111   }
112   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
113   diag_offset = a->diag;
114   if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); }
115   diag = a->idiag;
116   if (values) *values = a->idiag;
117   /* factor and invert each block */
118   switch (bs) {
119   case 1:
120     for (i = 0; i < mbs; i++) {
121       odiag   = v + 1 * diag_offset[i];
122       diag[0] = odiag[0];
123 
124       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
125         if (allowzeropivot) {
126           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
127           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
128           A->factorerror_zeropivot_row   = i;
129           PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
130         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
131       }
132 
133       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
134       diag += 1;
135     }
136     break;
137   case 2:
138     for (i = 0; i < mbs; i++) {
139       odiag   = v + 4 * diag_offset[i];
140       diag[0] = odiag[0];
141       diag[1] = odiag[1];
142       diag[2] = odiag[2];
143       diag[3] = odiag[3];
144       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
145       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
146       diag += 4;
147     }
148     break;
149   case 3:
150     for (i = 0; i < mbs; i++) {
151       odiag   = v + 9 * diag_offset[i];
152       diag[0] = odiag[0];
153       diag[1] = odiag[1];
154       diag[2] = odiag[2];
155       diag[3] = odiag[3];
156       diag[4] = odiag[4];
157       diag[5] = odiag[5];
158       diag[6] = odiag[6];
159       diag[7] = odiag[7];
160       diag[8] = odiag[8];
161       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
162       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
163       diag += 9;
164     }
165     break;
166   case 4:
167     for (i = 0; i < mbs; i++) {
168       odiag = v + 16 * diag_offset[i];
169       PetscCall(PetscArraycpy(diag, odiag, 16));
170       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
171       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
172       diag += 16;
173     }
174     break;
175   case 5:
176     for (i = 0; i < mbs; i++) {
177       odiag = v + 25 * diag_offset[i];
178       PetscCall(PetscArraycpy(diag, odiag, 25));
179       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
180       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
181       diag += 25;
182     }
183     break;
184   case 6:
185     for (i = 0; i < mbs; i++) {
186       odiag = v + 36 * diag_offset[i];
187       PetscCall(PetscArraycpy(diag, odiag, 36));
188       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
189       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
190       diag += 36;
191     }
192     break;
193   case 7:
194     for (i = 0; i < mbs; i++) {
195       odiag = v + 49 * diag_offset[i];
196       PetscCall(PetscArraycpy(diag, odiag, 49));
197       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
198       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
199       diag += 49;
200     }
201     break;
202   default:
203     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
204     for (i = 0; i < mbs; i++) {
205       odiag = v + bs2 * diag_offset[i];
206       PetscCall(PetscArraycpy(diag, odiag, bs2));
207       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
208       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
209       diag += bs2;
210     }
211     PetscCall(PetscFree2(v_work, v_pivots));
212   }
213   a->idiagvalid = PETSC_TRUE;
214   PetscFunctionReturn(PETSC_SUCCESS);
215 }
216 
217 PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
218 {
219   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
220   PetscScalar       *x, *work, *w, *workt, *t;
221   const MatScalar   *v, *aa = a->a, *idiag;
222   const PetscScalar *b, *xb;
223   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
224   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
225   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;
226 
227   PetscFunctionBegin;
228   its = its * lits;
229   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
230   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
231   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
232   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
233   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
234 
235   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
236 
237   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
238   diag  = a->diag;
239   idiag = a->idiag;
240   k     = PetscMax(A->rmap->n, A->cmap->n);
241   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
242   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
243   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
244   work = a->mult_work;
245   t    = a->sor_workt;
246   w    = a->sor_work;
247 
248   PetscCall(VecGetArray(xx, &x));
249   PetscCall(VecGetArrayRead(bb, &b));
250 
251   if (flag & SOR_ZERO_INITIAL_GUESS) {
252     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
253       switch (bs) {
254       case 1:
255         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
256         t[0] = b[0];
257         i2   = 1;
258         idiag += 1;
259         for (i = 1; i < m; i++) {
260           v    = aa + ai[i];
261           vi   = aj + ai[i];
262           nz   = diag[i] - ai[i];
263           s[0] = b[i2];
264           for (j = 0; j < nz; j++) {
265             xw[0] = x[vi[j]];
266             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
267           }
268           t[i2] = s[0];
269           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
270           x[i2] = xw[0];
271           idiag += 1;
272           i2 += 1;
273         }
274         break;
275       case 2:
276         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
277         t[0] = b[0];
278         t[1] = b[1];
279         i2   = 2;
280         idiag += 4;
281         for (i = 1; i < m; i++) {
282           v    = aa + 4 * ai[i];
283           vi   = aj + ai[i];
284           nz   = diag[i] - ai[i];
285           s[0] = b[i2];
286           s[1] = b[i2 + 1];
287           for (j = 0; j < nz; j++) {
288             idx   = 2 * vi[j];
289             it    = 4 * j;
290             xw[0] = x[idx];
291             xw[1] = x[1 + idx];
292             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
293           }
294           t[i2]     = s[0];
295           t[i2 + 1] = s[1];
296           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
297           x[i2]     = xw[0];
298           x[i2 + 1] = xw[1];
299           idiag += 4;
300           i2 += 2;
301         }
302         break;
303       case 3:
304         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
305         t[0] = b[0];
306         t[1] = b[1];
307         t[2] = b[2];
308         i2   = 3;
309         idiag += 9;
310         for (i = 1; i < m; i++) {
311           v    = aa + 9 * ai[i];
312           vi   = aj + ai[i];
313           nz   = diag[i] - ai[i];
314           s[0] = b[i2];
315           s[1] = b[i2 + 1];
316           s[2] = b[i2 + 2];
317           while (nz--) {
318             idx   = 3 * (*vi++);
319             xw[0] = x[idx];
320             xw[1] = x[1 + idx];
321             xw[2] = x[2 + idx];
322             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
323             v += 9;
324           }
325           t[i2]     = s[0];
326           t[i2 + 1] = s[1];
327           t[i2 + 2] = s[2];
328           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
329           x[i2]     = xw[0];
330           x[i2 + 1] = xw[1];
331           x[i2 + 2] = xw[2];
332           idiag += 9;
333           i2 += 3;
334         }
335         break;
336       case 4:
337         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
338         t[0] = b[0];
339         t[1] = b[1];
340         t[2] = b[2];
341         t[3] = b[3];
342         i2   = 4;
343         idiag += 16;
344         for (i = 1; i < m; i++) {
345           v    = aa + 16 * ai[i];
346           vi   = aj + ai[i];
347           nz   = diag[i] - ai[i];
348           s[0] = b[i2];
349           s[1] = b[i2 + 1];
350           s[2] = b[i2 + 2];
351           s[3] = b[i2 + 3];
352           while (nz--) {
353             idx   = 4 * (*vi++);
354             xw[0] = x[idx];
355             xw[1] = x[1 + idx];
356             xw[2] = x[2 + idx];
357             xw[3] = x[3 + idx];
358             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
359             v += 16;
360           }
361           t[i2]     = s[0];
362           t[i2 + 1] = s[1];
363           t[i2 + 2] = s[2];
364           t[i2 + 3] = s[3];
365           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
366           x[i2]     = xw[0];
367           x[i2 + 1] = xw[1];
368           x[i2 + 2] = xw[2];
369           x[i2 + 3] = xw[3];
370           idiag += 16;
371           i2 += 4;
372         }
373         break;
374       case 5:
375         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
376         t[0] = b[0];
377         t[1] = b[1];
378         t[2] = b[2];
379         t[3] = b[3];
380         t[4] = b[4];
381         i2   = 5;
382         idiag += 25;
383         for (i = 1; i < m; i++) {
384           v    = aa + 25 * ai[i];
385           vi   = aj + ai[i];
386           nz   = diag[i] - ai[i];
387           s[0] = b[i2];
388           s[1] = b[i2 + 1];
389           s[2] = b[i2 + 2];
390           s[3] = b[i2 + 3];
391           s[4] = b[i2 + 4];
392           while (nz--) {
393             idx   = 5 * (*vi++);
394             xw[0] = x[idx];
395             xw[1] = x[1 + idx];
396             xw[2] = x[2 + idx];
397             xw[3] = x[3 + idx];
398             xw[4] = x[4 + idx];
399             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
400             v += 25;
401           }
402           t[i2]     = s[0];
403           t[i2 + 1] = s[1];
404           t[i2 + 2] = s[2];
405           t[i2 + 3] = s[3];
406           t[i2 + 4] = s[4];
407           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
408           x[i2]     = xw[0];
409           x[i2 + 1] = xw[1];
410           x[i2 + 2] = xw[2];
411           x[i2 + 3] = xw[3];
412           x[i2 + 4] = xw[4];
413           idiag += 25;
414           i2 += 5;
415         }
416         break;
417       case 6:
418         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
419         t[0] = b[0];
420         t[1] = b[1];
421         t[2] = b[2];
422         t[3] = b[3];
423         t[4] = b[4];
424         t[5] = b[5];
425         i2   = 6;
426         idiag += 36;
427         for (i = 1; i < m; i++) {
428           v    = aa + 36 * ai[i];
429           vi   = aj + ai[i];
430           nz   = diag[i] - ai[i];
431           s[0] = b[i2];
432           s[1] = b[i2 + 1];
433           s[2] = b[i2 + 2];
434           s[3] = b[i2 + 3];
435           s[4] = b[i2 + 4];
436           s[5] = b[i2 + 5];
437           while (nz--) {
438             idx   = 6 * (*vi++);
439             xw[0] = x[idx];
440             xw[1] = x[1 + idx];
441             xw[2] = x[2 + idx];
442             xw[3] = x[3 + idx];
443             xw[4] = x[4 + idx];
444             xw[5] = x[5 + idx];
445             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
446             v += 36;
447           }
448           t[i2]     = s[0];
449           t[i2 + 1] = s[1];
450           t[i2 + 2] = s[2];
451           t[i2 + 3] = s[3];
452           t[i2 + 4] = s[4];
453           t[i2 + 5] = s[5];
454           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
455           x[i2]     = xw[0];
456           x[i2 + 1] = xw[1];
457           x[i2 + 2] = xw[2];
458           x[i2 + 3] = xw[3];
459           x[i2 + 4] = xw[4];
460           x[i2 + 5] = xw[5];
461           idiag += 36;
462           i2 += 6;
463         }
464         break;
465       case 7:
466         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
467         t[0] = b[0];
468         t[1] = b[1];
469         t[2] = b[2];
470         t[3] = b[3];
471         t[4] = b[4];
472         t[5] = b[5];
473         t[6] = b[6];
474         i2   = 7;
475         idiag += 49;
476         for (i = 1; i < m; i++) {
477           v    = aa + 49 * ai[i];
478           vi   = aj + ai[i];
479           nz   = diag[i] - ai[i];
480           s[0] = b[i2];
481           s[1] = b[i2 + 1];
482           s[2] = b[i2 + 2];
483           s[3] = b[i2 + 3];
484           s[4] = b[i2 + 4];
485           s[5] = b[i2 + 5];
486           s[6] = b[i2 + 6];
487           while (nz--) {
488             idx   = 7 * (*vi++);
489             xw[0] = x[idx];
490             xw[1] = x[1 + idx];
491             xw[2] = x[2 + idx];
492             xw[3] = x[3 + idx];
493             xw[4] = x[4 + idx];
494             xw[5] = x[5 + idx];
495             xw[6] = x[6 + idx];
496             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
497             v += 49;
498           }
499           t[i2]     = s[0];
500           t[i2 + 1] = s[1];
501           t[i2 + 2] = s[2];
502           t[i2 + 3] = s[3];
503           t[i2 + 4] = s[4];
504           t[i2 + 5] = s[5];
505           t[i2 + 6] = s[6];
506           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
507           x[i2]     = xw[0];
508           x[i2 + 1] = xw[1];
509           x[i2 + 2] = xw[2];
510           x[i2 + 3] = xw[3];
511           x[i2 + 4] = xw[4];
512           x[i2 + 5] = xw[5];
513           x[i2 + 6] = xw[6];
514           idiag += 49;
515           i2 += 7;
516         }
517         break;
518       default:
519         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
520         PetscCall(PetscArraycpy(t, b, bs));
521         i2 = bs;
522         idiag += bs2;
523         for (i = 1; i < m; i++) {
524           v  = aa + bs2 * ai[i];
525           vi = aj + ai[i];
526           nz = diag[i] - ai[i];
527 
528           PetscCall(PetscArraycpy(w, b + i2, bs));
529           /* copy all rows of x that are needed into contiguous space */
530           workt = work;
531           for (j = 0; j < nz; j++) {
532             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
533             workt += bs;
534           }
535           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
536           PetscCall(PetscArraycpy(t + i2, w, bs));
537           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
538 
539           idiag += bs2;
540           i2 += bs;
541         }
542         break;
543       }
544       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
545       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
546       xb = t;
547     } else xb = b;
548     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
549       idiag = a->idiag + bs2 * (a->mbs - 1);
550       i2    = bs * (m - 1);
551       switch (bs) {
552       case 1:
553         s[0] = xb[i2];
554         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
555         x[i2] = xw[0];
556         i2 -= 1;
557         for (i = m - 2; i >= 0; i--) {
558           v    = aa + (diag[i] + 1);
559           vi   = aj + diag[i] + 1;
560           nz   = ai[i + 1] - diag[i] - 1;
561           s[0] = xb[i2];
562           for (j = 0; j < nz; j++) {
563             xw[0] = x[vi[j]];
564             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
565           }
566           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
567           x[i2] = xw[0];
568           idiag -= 1;
569           i2 -= 1;
570         }
571         break;
572       case 2:
573         s[0] = xb[i2];
574         s[1] = xb[i2 + 1];
575         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
576         x[i2]     = xw[0];
577         x[i2 + 1] = xw[1];
578         i2 -= 2;
579         idiag -= 4;
580         for (i = m - 2; i >= 0; i--) {
581           v    = aa + 4 * (diag[i] + 1);
582           vi   = aj + diag[i] + 1;
583           nz   = ai[i + 1] - diag[i] - 1;
584           s[0] = xb[i2];
585           s[1] = xb[i2 + 1];
586           for (j = 0; j < nz; j++) {
587             idx   = 2 * vi[j];
588             it    = 4 * j;
589             xw[0] = x[idx];
590             xw[1] = x[1 + idx];
591             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
592           }
593           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
594           x[i2]     = xw[0];
595           x[i2 + 1] = xw[1];
596           idiag -= 4;
597           i2 -= 2;
598         }
599         break;
600       case 3:
601         s[0] = xb[i2];
602         s[1] = xb[i2 + 1];
603         s[2] = xb[i2 + 2];
604         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
605         x[i2]     = xw[0];
606         x[i2 + 1] = xw[1];
607         x[i2 + 2] = xw[2];
608         i2 -= 3;
609         idiag -= 9;
610         for (i = m - 2; i >= 0; i--) {
611           v    = aa + 9 * (diag[i] + 1);
612           vi   = aj + diag[i] + 1;
613           nz   = ai[i + 1] - diag[i] - 1;
614           s[0] = xb[i2];
615           s[1] = xb[i2 + 1];
616           s[2] = xb[i2 + 2];
617           while (nz--) {
618             idx   = 3 * (*vi++);
619             xw[0] = x[idx];
620             xw[1] = x[1 + idx];
621             xw[2] = x[2 + idx];
622             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
623             v += 9;
624           }
625           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
626           x[i2]     = xw[0];
627           x[i2 + 1] = xw[1];
628           x[i2 + 2] = xw[2];
629           idiag -= 9;
630           i2 -= 3;
631         }
632         break;
633       case 4:
634         s[0] = xb[i2];
635         s[1] = xb[i2 + 1];
636         s[2] = xb[i2 + 2];
637         s[3] = xb[i2 + 3];
638         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
639         x[i2]     = xw[0];
640         x[i2 + 1] = xw[1];
641         x[i2 + 2] = xw[2];
642         x[i2 + 3] = xw[3];
643         i2 -= 4;
644         idiag -= 16;
645         for (i = m - 2; i >= 0; i--) {
646           v    = aa + 16 * (diag[i] + 1);
647           vi   = aj + diag[i] + 1;
648           nz   = ai[i + 1] - diag[i] - 1;
649           s[0] = xb[i2];
650           s[1] = xb[i2 + 1];
651           s[2] = xb[i2 + 2];
652           s[3] = xb[i2 + 3];
653           while (nz--) {
654             idx   = 4 * (*vi++);
655             xw[0] = x[idx];
656             xw[1] = x[1 + idx];
657             xw[2] = x[2 + idx];
658             xw[3] = x[3 + idx];
659             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
660             v += 16;
661           }
662           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
663           x[i2]     = xw[0];
664           x[i2 + 1] = xw[1];
665           x[i2 + 2] = xw[2];
666           x[i2 + 3] = xw[3];
667           idiag -= 16;
668           i2 -= 4;
669         }
670         break;
671       case 5:
672         s[0] = xb[i2];
673         s[1] = xb[i2 + 1];
674         s[2] = xb[i2 + 2];
675         s[3] = xb[i2 + 3];
676         s[4] = xb[i2 + 4];
677         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
678         x[i2]     = xw[0];
679         x[i2 + 1] = xw[1];
680         x[i2 + 2] = xw[2];
681         x[i2 + 3] = xw[3];
682         x[i2 + 4] = xw[4];
683         i2 -= 5;
684         idiag -= 25;
685         for (i = m - 2; i >= 0; i--) {
686           v    = aa + 25 * (diag[i] + 1);
687           vi   = aj + diag[i] + 1;
688           nz   = ai[i + 1] - diag[i] - 1;
689           s[0] = xb[i2];
690           s[1] = xb[i2 + 1];
691           s[2] = xb[i2 + 2];
692           s[3] = xb[i2 + 3];
693           s[4] = xb[i2 + 4];
694           while (nz--) {
695             idx   = 5 * (*vi++);
696             xw[0] = x[idx];
697             xw[1] = x[1 + idx];
698             xw[2] = x[2 + idx];
699             xw[3] = x[3 + idx];
700             xw[4] = x[4 + idx];
701             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
702             v += 25;
703           }
704           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
705           x[i2]     = xw[0];
706           x[i2 + 1] = xw[1];
707           x[i2 + 2] = xw[2];
708           x[i2 + 3] = xw[3];
709           x[i2 + 4] = xw[4];
710           idiag -= 25;
711           i2 -= 5;
712         }
713         break;
714       case 6:
715         s[0] = xb[i2];
716         s[1] = xb[i2 + 1];
717         s[2] = xb[i2 + 2];
718         s[3] = xb[i2 + 3];
719         s[4] = xb[i2 + 4];
720         s[5] = xb[i2 + 5];
721         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
722         x[i2]     = xw[0];
723         x[i2 + 1] = xw[1];
724         x[i2 + 2] = xw[2];
725         x[i2 + 3] = xw[3];
726         x[i2 + 4] = xw[4];
727         x[i2 + 5] = xw[5];
728         i2 -= 6;
729         idiag -= 36;
730         for (i = m - 2; i >= 0; i--) {
731           v    = aa + 36 * (diag[i] + 1);
732           vi   = aj + diag[i] + 1;
733           nz   = ai[i + 1] - diag[i] - 1;
734           s[0] = xb[i2];
735           s[1] = xb[i2 + 1];
736           s[2] = xb[i2 + 2];
737           s[3] = xb[i2 + 3];
738           s[4] = xb[i2 + 4];
739           s[5] = xb[i2 + 5];
740           while (nz--) {
741             idx   = 6 * (*vi++);
742             xw[0] = x[idx];
743             xw[1] = x[1 + idx];
744             xw[2] = x[2 + idx];
745             xw[3] = x[3 + idx];
746             xw[4] = x[4 + idx];
747             xw[5] = x[5 + idx];
748             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
749             v += 36;
750           }
751           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
752           x[i2]     = xw[0];
753           x[i2 + 1] = xw[1];
754           x[i2 + 2] = xw[2];
755           x[i2 + 3] = xw[3];
756           x[i2 + 4] = xw[4];
757           x[i2 + 5] = xw[5];
758           idiag -= 36;
759           i2 -= 6;
760         }
761         break;
762       case 7:
763         s[0] = xb[i2];
764         s[1] = xb[i2 + 1];
765         s[2] = xb[i2 + 2];
766         s[3] = xb[i2 + 3];
767         s[4] = xb[i2 + 4];
768         s[5] = xb[i2 + 5];
769         s[6] = xb[i2 + 6];
770         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
771         x[i2]     = xw[0];
772         x[i2 + 1] = xw[1];
773         x[i2 + 2] = xw[2];
774         x[i2 + 3] = xw[3];
775         x[i2 + 4] = xw[4];
776         x[i2 + 5] = xw[5];
777         x[i2 + 6] = xw[6];
778         i2 -= 7;
779         idiag -= 49;
780         for (i = m - 2; i >= 0; i--) {
781           v    = aa + 49 * (diag[i] + 1);
782           vi   = aj + diag[i] + 1;
783           nz   = ai[i + 1] - diag[i] - 1;
784           s[0] = xb[i2];
785           s[1] = xb[i2 + 1];
786           s[2] = xb[i2 + 2];
787           s[3] = xb[i2 + 3];
788           s[4] = xb[i2 + 4];
789           s[5] = xb[i2 + 5];
790           s[6] = xb[i2 + 6];
791           while (nz--) {
792             idx   = 7 * (*vi++);
793             xw[0] = x[idx];
794             xw[1] = x[1 + idx];
795             xw[2] = x[2 + idx];
796             xw[3] = x[3 + idx];
797             xw[4] = x[4 + idx];
798             xw[5] = x[5 + idx];
799             xw[6] = x[6 + idx];
800             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
801             v += 49;
802           }
803           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
804           x[i2]     = xw[0];
805           x[i2 + 1] = xw[1];
806           x[i2 + 2] = xw[2];
807           x[i2 + 3] = xw[3];
808           x[i2 + 4] = xw[4];
809           x[i2 + 5] = xw[5];
810           x[i2 + 6] = xw[6];
811           idiag -= 49;
812           i2 -= 7;
813         }
814         break;
815       default:
816         PetscCall(PetscArraycpy(w, xb + i2, bs));
817         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
818         i2 -= bs;
819         idiag -= bs2;
820         for (i = m - 2; i >= 0; i--) {
821           v  = aa + bs2 * (diag[i] + 1);
822           vi = aj + diag[i] + 1;
823           nz = ai[i + 1] - diag[i] - 1;
824 
825           PetscCall(PetscArraycpy(w, xb + i2, bs));
826           /* copy all rows of x that are needed into contiguous space */
827           workt = work;
828           for (j = 0; j < nz; j++) {
829             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
830             workt += bs;
831           }
832           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
833           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
834 
835           idiag -= bs2;
836           i2 -= bs;
837         }
838         break;
839       }
840       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
841     }
842     its--;
843   }
844   while (its--) {
845     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
846       idiag = a->idiag;
847       i2    = 0;
848       switch (bs) {
849       case 1:
850         for (i = 0; i < m; i++) {
851           v    = aa + ai[i];
852           vi   = aj + ai[i];
853           nz   = ai[i + 1] - ai[i];
854           s[0] = b[i2];
855           for (j = 0; j < nz; j++) {
856             xw[0] = x[vi[j]];
857             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
858           }
859           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
860           x[i2] += xw[0];
861           idiag += 1;
862           i2 += 1;
863         }
864         break;
865       case 2:
866         for (i = 0; i < m; i++) {
867           v    = aa + 4 * ai[i];
868           vi   = aj + ai[i];
869           nz   = ai[i + 1] - ai[i];
870           s[0] = b[i2];
871           s[1] = b[i2 + 1];
872           for (j = 0; j < nz; j++) {
873             idx   = 2 * vi[j];
874             it    = 4 * j;
875             xw[0] = x[idx];
876             xw[1] = x[1 + idx];
877             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
878           }
879           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
880           x[i2] += xw[0];
881           x[i2 + 1] += xw[1];
882           idiag += 4;
883           i2 += 2;
884         }
885         break;
886       case 3:
887         for (i = 0; i < m; i++) {
888           v    = aa + 9 * ai[i];
889           vi   = aj + ai[i];
890           nz   = ai[i + 1] - ai[i];
891           s[0] = b[i2];
892           s[1] = b[i2 + 1];
893           s[2] = b[i2 + 2];
894           while (nz--) {
895             idx   = 3 * (*vi++);
896             xw[0] = x[idx];
897             xw[1] = x[1 + idx];
898             xw[2] = x[2 + idx];
899             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
900             v += 9;
901           }
902           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
903           x[i2] += xw[0];
904           x[i2 + 1] += xw[1];
905           x[i2 + 2] += xw[2];
906           idiag += 9;
907           i2 += 3;
908         }
909         break;
910       case 4:
911         for (i = 0; i < m; i++) {
912           v    = aa + 16 * ai[i];
913           vi   = aj + ai[i];
914           nz   = ai[i + 1] - ai[i];
915           s[0] = b[i2];
916           s[1] = b[i2 + 1];
917           s[2] = b[i2 + 2];
918           s[3] = b[i2 + 3];
919           while (nz--) {
920             idx   = 4 * (*vi++);
921             xw[0] = x[idx];
922             xw[1] = x[1 + idx];
923             xw[2] = x[2 + idx];
924             xw[3] = x[3 + idx];
925             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
926             v += 16;
927           }
928           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
929           x[i2] += xw[0];
930           x[i2 + 1] += xw[1];
931           x[i2 + 2] += xw[2];
932           x[i2 + 3] += xw[3];
933           idiag += 16;
934           i2 += 4;
935         }
936         break;
937       case 5:
938         for (i = 0; i < m; i++) {
939           v    = aa + 25 * ai[i];
940           vi   = aj + ai[i];
941           nz   = ai[i + 1] - ai[i];
942           s[0] = b[i2];
943           s[1] = b[i2 + 1];
944           s[2] = b[i2 + 2];
945           s[3] = b[i2 + 3];
946           s[4] = b[i2 + 4];
947           while (nz--) {
948             idx   = 5 * (*vi++);
949             xw[0] = x[idx];
950             xw[1] = x[1 + idx];
951             xw[2] = x[2 + idx];
952             xw[3] = x[3 + idx];
953             xw[4] = x[4 + idx];
954             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
955             v += 25;
956           }
957           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
958           x[i2] += xw[0];
959           x[i2 + 1] += xw[1];
960           x[i2 + 2] += xw[2];
961           x[i2 + 3] += xw[3];
962           x[i2 + 4] += xw[4];
963           idiag += 25;
964           i2 += 5;
965         }
966         break;
967       case 6:
968         for (i = 0; i < m; i++) {
969           v    = aa + 36 * ai[i];
970           vi   = aj + ai[i];
971           nz   = ai[i + 1] - ai[i];
972           s[0] = b[i2];
973           s[1] = b[i2 + 1];
974           s[2] = b[i2 + 2];
975           s[3] = b[i2 + 3];
976           s[4] = b[i2 + 4];
977           s[5] = b[i2 + 5];
978           while (nz--) {
979             idx   = 6 * (*vi++);
980             xw[0] = x[idx];
981             xw[1] = x[1 + idx];
982             xw[2] = x[2 + idx];
983             xw[3] = x[3 + idx];
984             xw[4] = x[4 + idx];
985             xw[5] = x[5 + idx];
986             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
987             v += 36;
988           }
989           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
990           x[i2] += xw[0];
991           x[i2 + 1] += xw[1];
992           x[i2 + 2] += xw[2];
993           x[i2 + 3] += xw[3];
994           x[i2 + 4] += xw[4];
995           x[i2 + 5] += xw[5];
996           idiag += 36;
997           i2 += 6;
998         }
999         break;
1000       case 7:
1001         for (i = 0; i < m; i++) {
1002           v    = aa + 49 * ai[i];
1003           vi   = aj + ai[i];
1004           nz   = ai[i + 1] - ai[i];
1005           s[0] = b[i2];
1006           s[1] = b[i2 + 1];
1007           s[2] = b[i2 + 2];
1008           s[3] = b[i2 + 3];
1009           s[4] = b[i2 + 4];
1010           s[5] = b[i2 + 5];
1011           s[6] = b[i2 + 6];
1012           while (nz--) {
1013             idx   = 7 * (*vi++);
1014             xw[0] = x[idx];
1015             xw[1] = x[1 + idx];
1016             xw[2] = x[2 + idx];
1017             xw[3] = x[3 + idx];
1018             xw[4] = x[4 + idx];
1019             xw[5] = x[5 + idx];
1020             xw[6] = x[6 + idx];
1021             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1022             v += 49;
1023           }
1024           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1025           x[i2] += xw[0];
1026           x[i2 + 1] += xw[1];
1027           x[i2 + 2] += xw[2];
1028           x[i2 + 3] += xw[3];
1029           x[i2 + 4] += xw[4];
1030           x[i2 + 5] += xw[5];
1031           x[i2 + 6] += xw[6];
1032           idiag += 49;
1033           i2 += 7;
1034         }
1035         break;
1036       default:
1037         for (i = 0; i < m; i++) {
1038           v  = aa + bs2 * ai[i];
1039           vi = aj + ai[i];
1040           nz = ai[i + 1] - ai[i];
1041 
1042           PetscCall(PetscArraycpy(w, b + i2, bs));
1043           /* copy all rows of x that are needed into contiguous space */
1044           workt = work;
1045           for (j = 0; j < nz; j++) {
1046             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1047             workt += bs;
1048           }
1049           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1050           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1051 
1052           idiag += bs2;
1053           i2 += bs;
1054         }
1055         break;
1056       }
1057       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1058     }
1059     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1060       idiag = a->idiag + bs2 * (a->mbs - 1);
1061       i2    = bs * (m - 1);
1062       switch (bs) {
1063       case 1:
1064         for (i = m - 1; i >= 0; i--) {
1065           v    = aa + ai[i];
1066           vi   = aj + ai[i];
1067           nz   = ai[i + 1] - ai[i];
1068           s[0] = b[i2];
1069           for (j = 0; j < nz; j++) {
1070             xw[0] = x[vi[j]];
1071             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1072           }
1073           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1074           x[i2] += xw[0];
1075           idiag -= 1;
1076           i2 -= 1;
1077         }
1078         break;
1079       case 2:
1080         for (i = m - 1; i >= 0; i--) {
1081           v    = aa + 4 * ai[i];
1082           vi   = aj + ai[i];
1083           nz   = ai[i + 1] - ai[i];
1084           s[0] = b[i2];
1085           s[1] = b[i2 + 1];
1086           for (j = 0; j < nz; j++) {
1087             idx   = 2 * vi[j];
1088             it    = 4 * j;
1089             xw[0] = x[idx];
1090             xw[1] = x[1 + idx];
1091             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1092           }
1093           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
1094           x[i2] += xw[0];
1095           x[i2 + 1] += xw[1];
1096           idiag -= 4;
1097           i2 -= 2;
1098         }
1099         break;
1100       case 3:
1101         for (i = m - 1; i >= 0; i--) {
1102           v    = aa + 9 * ai[i];
1103           vi   = aj + ai[i];
1104           nz   = ai[i + 1] - ai[i];
1105           s[0] = b[i2];
1106           s[1] = b[i2 + 1];
1107           s[2] = b[i2 + 2];
1108           while (nz--) {
1109             idx   = 3 * (*vi++);
1110             xw[0] = x[idx];
1111             xw[1] = x[1 + idx];
1112             xw[2] = x[2 + idx];
1113             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1114             v += 9;
1115           }
1116           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
1117           x[i2] += xw[0];
1118           x[i2 + 1] += xw[1];
1119           x[i2 + 2] += xw[2];
1120           idiag -= 9;
1121           i2 -= 3;
1122         }
1123         break;
1124       case 4:
1125         for (i = m - 1; i >= 0; i--) {
1126           v    = aa + 16 * ai[i];
1127           vi   = aj + ai[i];
1128           nz   = ai[i + 1] - ai[i];
1129           s[0] = b[i2];
1130           s[1] = b[i2 + 1];
1131           s[2] = b[i2 + 2];
1132           s[3] = b[i2 + 3];
1133           while (nz--) {
1134             idx   = 4 * (*vi++);
1135             xw[0] = x[idx];
1136             xw[1] = x[1 + idx];
1137             xw[2] = x[2 + idx];
1138             xw[3] = x[3 + idx];
1139             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1140             v += 16;
1141           }
1142           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
1143           x[i2] += xw[0];
1144           x[i2 + 1] += xw[1];
1145           x[i2 + 2] += xw[2];
1146           x[i2 + 3] += xw[3];
1147           idiag -= 16;
1148           i2 -= 4;
1149         }
1150         break;
1151       case 5:
1152         for (i = m - 1; i >= 0; i--) {
1153           v    = aa + 25 * ai[i];
1154           vi   = aj + ai[i];
1155           nz   = ai[i + 1] - ai[i];
1156           s[0] = b[i2];
1157           s[1] = b[i2 + 1];
1158           s[2] = b[i2 + 2];
1159           s[3] = b[i2 + 3];
1160           s[4] = b[i2 + 4];
1161           while (nz--) {
1162             idx   = 5 * (*vi++);
1163             xw[0] = x[idx];
1164             xw[1] = x[1 + idx];
1165             xw[2] = x[2 + idx];
1166             xw[3] = x[3 + idx];
1167             xw[4] = x[4 + idx];
1168             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1169             v += 25;
1170           }
1171           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
1172           x[i2] += xw[0];
1173           x[i2 + 1] += xw[1];
1174           x[i2 + 2] += xw[2];
1175           x[i2 + 3] += xw[3];
1176           x[i2 + 4] += xw[4];
1177           idiag -= 25;
1178           i2 -= 5;
1179         }
1180         break;
1181       case 6:
1182         for (i = m - 1; i >= 0; i--) {
1183           v    = aa + 36 * ai[i];
1184           vi   = aj + ai[i];
1185           nz   = ai[i + 1] - ai[i];
1186           s[0] = b[i2];
1187           s[1] = b[i2 + 1];
1188           s[2] = b[i2 + 2];
1189           s[3] = b[i2 + 3];
1190           s[4] = b[i2 + 4];
1191           s[5] = b[i2 + 5];
1192           while (nz--) {
1193             idx   = 6 * (*vi++);
1194             xw[0] = x[idx];
1195             xw[1] = x[1 + idx];
1196             xw[2] = x[2 + idx];
1197             xw[3] = x[3 + idx];
1198             xw[4] = x[4 + idx];
1199             xw[5] = x[5 + idx];
1200             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1201             v += 36;
1202           }
1203           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
1204           x[i2] += xw[0];
1205           x[i2 + 1] += xw[1];
1206           x[i2 + 2] += xw[2];
1207           x[i2 + 3] += xw[3];
1208           x[i2 + 4] += xw[4];
1209           x[i2 + 5] += xw[5];
1210           idiag -= 36;
1211           i2 -= 6;
1212         }
1213         break;
1214       case 7:
1215         for (i = m - 1; i >= 0; i--) {
1216           v    = aa + 49 * ai[i];
1217           vi   = aj + ai[i];
1218           nz   = ai[i + 1] - ai[i];
1219           s[0] = b[i2];
1220           s[1] = b[i2 + 1];
1221           s[2] = b[i2 + 2];
1222           s[3] = b[i2 + 3];
1223           s[4] = b[i2 + 4];
1224           s[5] = b[i2 + 5];
1225           s[6] = b[i2 + 6];
1226           while (nz--) {
1227             idx   = 7 * (*vi++);
1228             xw[0] = x[idx];
1229             xw[1] = x[1 + idx];
1230             xw[2] = x[2 + idx];
1231             xw[3] = x[3 + idx];
1232             xw[4] = x[4 + idx];
1233             xw[5] = x[5 + idx];
1234             xw[6] = x[6 + idx];
1235             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1236             v += 49;
1237           }
1238           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1239           x[i2] += xw[0];
1240           x[i2 + 1] += xw[1];
1241           x[i2 + 2] += xw[2];
1242           x[i2 + 3] += xw[3];
1243           x[i2 + 4] += xw[4];
1244           x[i2 + 5] += xw[5];
1245           x[i2 + 6] += xw[6];
1246           idiag -= 49;
1247           i2 -= 7;
1248         }
1249         break;
1250       default:
1251         for (i = m - 1; i >= 0; i--) {
1252           v  = aa + bs2 * ai[i];
1253           vi = aj + ai[i];
1254           nz = ai[i + 1] - ai[i];
1255 
1256           PetscCall(PetscArraycpy(w, b + i2, bs));
1257           /* copy all rows of x that are needed into contiguous space */
1258           workt = work;
1259           for (j = 0; j < nz; j++) {
1260             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1261             workt += bs;
1262           }
1263           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1264           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1265 
1266           idiag -= bs2;
1267           i2 -= bs;
1268         }
1269         break;
1270       }
1271       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1272     }
1273   }
1274   PetscCall(VecRestoreArray(xx, &x));
1275   PetscCall(VecRestoreArrayRead(bb, &b));
1276   PetscFunctionReturn(PETSC_SUCCESS);
1277 }
1278 
1279 /*
1280     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1281 */
1282 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1283   #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1284 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1285   #define matsetvaluesblocked4_ matsetvaluesblocked4
1286 #endif
1287 
1288 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1289 {
1290   Mat                A = *AA;
1291   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1292   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1293   PetscInt          *ai = a->i, *ailen = a->ilen;
1294   PetscInt          *aj = a->j, stepval, lastcol = -1;
1295   const PetscScalar *value = v;
1296   MatScalar         *ap, *aa = a->a, *bap;
1297 
1298   PetscFunctionBegin;
1299   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1300   stepval = (n - 1) * 4;
1301   for (k = 0; k < m; k++) { /* loop over added rows */
1302     row  = im[k];
1303     rp   = aj + ai[row];
1304     ap   = aa + 16 * ai[row];
1305     nrow = ailen[row];
1306     low  = 0;
1307     high = nrow;
1308     for (l = 0; l < n; l++) { /* loop over added columns */
1309       col = in[l];
1310       if (col <= lastcol) low = 0;
1311       else high = nrow;
1312       lastcol = col;
1313       value   = v + k * (stepval + 4 + l) * 4;
1314       while (high - low > 7) {
1315         t = (low + high) / 2;
1316         if (rp[t] > col) high = t;
1317         else low = t;
1318       }
1319       for (i = low; i < high; i++) {
1320         if (rp[i] > col) break;
1321         if (rp[i] == col) {
1322           bap = ap + 16 * i;
1323           for (ii = 0; ii < 4; ii++, value += stepval) {
1324             for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1325           }
1326           goto noinsert2;
1327         }
1328       }
1329       N = nrow++ - 1;
1330       high++; /* added new column index thus must search to one higher than before */
1331       /* shift up all the later entries in this row */
1332       for (ii = N; ii >= i; ii--) {
1333         rp[ii + 1] = rp[ii];
1334         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1335       }
1336       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1337       rp[i] = col;
1338       bap   = ap + 16 * i;
1339       for (ii = 0; ii < 4; ii++, value += stepval) {
1340         for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1341       }
1342     noinsert2:;
1343       low = i;
1344     }
1345     ailen[row] = nrow;
1346   }
1347   PetscFunctionReturnVoid();
1348 }
1349 
1350 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1351   #define matsetvalues4_ MATSETVALUES4
1352 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1353   #define matsetvalues4_ matsetvalues4
1354 #endif
1355 
1356 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1357 {
1358   Mat          A = *AA;
1359   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1360   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1361   PetscInt    *ai = a->i, *ailen = a->ilen;
1362   PetscInt    *aj = a->j, brow, bcol;
1363   PetscInt     ridx, cidx, lastcol = -1;
1364   MatScalar   *ap, value, *aa      = a->a, *bap;
1365 
1366   PetscFunctionBegin;
1367   for (k = 0; k < m; k++) { /* loop over added rows */
1368     row  = im[k];
1369     brow = row / 4;
1370     rp   = aj + ai[brow];
1371     ap   = aa + 16 * ai[brow];
1372     nrow = ailen[brow];
1373     low  = 0;
1374     high = nrow;
1375     for (l = 0; l < n; l++) { /* loop over added columns */
1376       col   = in[l];
1377       bcol  = col / 4;
1378       ridx  = row % 4;
1379       cidx  = col % 4;
1380       value = v[l + k * n];
1381       if (col <= lastcol) low = 0;
1382       else high = nrow;
1383       lastcol = col;
1384       while (high - low > 7) {
1385         t = (low + high) / 2;
1386         if (rp[t] > bcol) high = t;
1387         else low = t;
1388       }
1389       for (i = low; i < high; i++) {
1390         if (rp[i] > bcol) break;
1391         if (rp[i] == bcol) {
1392           bap = ap + 16 * i + 4 * cidx + ridx;
1393           *bap += value;
1394           goto noinsert1;
1395         }
1396       }
1397       N = nrow++ - 1;
1398       high++; /* added new column thus must search to one higher than before */
1399       /* shift up all the later entries in this row */
1400       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
1401       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
1402       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1403       rp[i]                        = bcol;
1404       ap[16 * i + 4 * cidx + ridx] = value;
1405     noinsert1:;
1406       low = i;
1407     }
1408     ailen[brow] = nrow;
1409   }
1410   PetscFunctionReturnVoid();
1411 }
1412 
1413 /*
1414      Checks for missing diagonals
1415 */
1416 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1417 {
1418   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1419   PetscInt    *diag, *ii = a->i, i;
1420 
1421   PetscFunctionBegin;
1422   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
1423   *missing = PETSC_FALSE;
1424   if (A->rmap->n > 0 && !ii) {
1425     *missing = PETSC_TRUE;
1426     if (d) *d = 0;
1427     PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n"));
1428   } else {
1429     PetscInt n;
1430     n    = PetscMin(a->mbs, a->nbs);
1431     diag = a->diag;
1432     for (i = 0; i < n; i++) {
1433       if (diag[i] >= ii[i + 1]) {
1434         *missing = PETSC_TRUE;
1435         if (d) *d = i;
1436         PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i));
1437         break;
1438       }
1439     }
1440   }
1441   PetscFunctionReturn(PETSC_SUCCESS);
1442 }
1443 
1444 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1445 {
1446   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1447   PetscInt     i, j, m = a->mbs;
1448 
1449   PetscFunctionBegin;
1450   if (!a->diag) {
1451     PetscCall(PetscMalloc1(m, &a->diag));
1452     a->free_diag = PETSC_TRUE;
1453   }
1454   for (i = 0; i < m; i++) {
1455     a->diag[i] = a->i[i + 1];
1456     for (j = a->i[i]; j < a->i[i + 1]; j++) {
1457       if (a->j[j] == i) {
1458         a->diag[i] = j;
1459         break;
1460       }
1461     }
1462   }
1463   PetscFunctionReturn(PETSC_SUCCESS);
1464 }
1465 
1466 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1467 {
1468   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1469   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
1470   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
1471 
1472   PetscFunctionBegin;
1473   *nn = n;
1474   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1475   if (symmetric) {
1476     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1477     nz = tia[n];
1478   } else {
1479     tia = a->i;
1480     tja = a->j;
1481   }
1482 
1483   if (!blockcompressed && bs > 1) {
1484     (*nn) *= bs;
1485     /* malloc & create the natural set of indices */
1486     PetscCall(PetscMalloc1((n + 1) * bs, ia));
1487     if (n) {
1488       (*ia)[0] = oshift;
1489       for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
1490     }
1491 
1492     for (i = 1; i < n; i++) {
1493       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1494       for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
1495     }
1496     if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1497 
1498     if (inja) {
1499       PetscCall(PetscMalloc1(nz * bs * bs, ja));
1500       cnt = 0;
1501       for (i = 0; i < n; i++) {
1502         for (j = 0; j < bs; j++) {
1503           for (k = tia[i]; k < tia[i + 1]; k++) {
1504             for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
1505           }
1506         }
1507       }
1508     }
1509 
1510     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1511       PetscCall(PetscFree(tia));
1512       PetscCall(PetscFree(tja));
1513     }
1514   } else if (oshift == 1) {
1515     if (symmetric) {
1516       nz = tia[A->rmap->n / bs];
1517       /*  add 1 to i and j indices */
1518       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1519       *ia = tia;
1520       if (ja) {
1521         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1522         *ja = tja;
1523       }
1524     } else {
1525       nz = a->i[A->rmap->n / bs];
1526       /* malloc space and  add 1 to i and j indices */
1527       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1528       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1529       if (ja) {
1530         PetscCall(PetscMalloc1(nz, ja));
1531         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1532       }
1533     }
1534   } else {
1535     *ia = tia;
1536     if (ja) *ja = tja;
1537   }
1538   PetscFunctionReturn(PETSC_SUCCESS);
1539 }
1540 
1541 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1542 {
1543   PetscFunctionBegin;
1544   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1545   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1546     PetscCall(PetscFree(*ia));
1547     if (ja) PetscCall(PetscFree(*ja));
1548   }
1549   PetscFunctionReturn(PETSC_SUCCESS);
1550 }
1551 
1552 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1553 {
1554   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1555 
1556   PetscFunctionBegin;
1557   if (A->hash_active) {
1558     PetscInt bs;
1559     A->ops[0] = a->cops;
1560     PetscCall(PetscHMapIJVDestroy(&a->ht));
1561     PetscCall(MatGetBlockSize(A, &bs));
1562     if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht));
1563     PetscCall(PetscFree(a->dnz));
1564     PetscCall(PetscFree(a->bdnz));
1565     A->hash_active = PETSC_FALSE;
1566   }
1567   PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
1568   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
1569   PetscCall(ISDestroy(&a->row));
1570   PetscCall(ISDestroy(&a->col));
1571   if (a->free_diag) PetscCall(PetscFree(a->diag));
1572   PetscCall(PetscFree(a->idiag));
1573   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
1574   PetscCall(PetscFree(a->solve_work));
1575   PetscCall(PetscFree(a->mult_work));
1576   PetscCall(PetscFree(a->sor_workt));
1577   PetscCall(PetscFree(a->sor_work));
1578   PetscCall(ISDestroy(&a->icol));
1579   PetscCall(PetscFree(a->saved_values));
1580   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1581 
1582   PetscCall(MatDestroy(&a->sbaijMat));
1583   PetscCall(MatDestroy(&a->parent));
1584   PetscCall(PetscFree(A->data));
1585 
1586   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
1587   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
1588   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
1589   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
1590   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
1591   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
1592   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
1593   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
1594   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
1595   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
1596   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
1597   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
1598 #if defined(PETSC_HAVE_HYPRE)
1599   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
1600 #endif
1601   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
1602   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1603   PetscFunctionReturn(PETSC_SUCCESS);
1604 }
1605 
1606 PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1607 {
1608   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1609 
1610   PetscFunctionBegin;
1611   switch (op) {
1612   case MAT_ROW_ORIENTED:
1613     a->roworiented = flg;
1614     break;
1615   case MAT_KEEP_NONZERO_PATTERN:
1616     a->keepnonzeropattern = flg;
1617     break;
1618   case MAT_NEW_NONZERO_LOCATIONS:
1619     a->nonew = (flg ? 0 : 1);
1620     break;
1621   case MAT_NEW_NONZERO_LOCATION_ERR:
1622     a->nonew = (flg ? -1 : 0);
1623     break;
1624   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1625     a->nonew = (flg ? -2 : 0);
1626     break;
1627   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1628     a->nounused = (flg ? -1 : 0);
1629     break;
1630   case MAT_FORCE_DIAGONAL_ENTRIES:
1631   case MAT_IGNORE_OFF_PROC_ENTRIES:
1632   case MAT_USE_HASH_TABLE:
1633   case MAT_SORTED_FULL:
1634     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1635     break;
1636   case MAT_SPD:
1637   case MAT_SYMMETRIC:
1638   case MAT_STRUCTURALLY_SYMMETRIC:
1639   case MAT_HERMITIAN:
1640   case MAT_SYMMETRY_ETERNAL:
1641   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1642   case MAT_SUBMAT_SINGLEIS:
1643   case MAT_STRUCTURE_ONLY:
1644   case MAT_SPD_ETERNAL:
1645     /* if the diagonal matrix is square it inherits some of the properties above */
1646     break;
1647   default:
1648     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1649   }
1650   PetscFunctionReturn(PETSC_SUCCESS);
1651 }
1652 
1653 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1654 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1655 {
1656   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
1657   MatScalar   *aa_i;
1658   PetscScalar *v_i;
1659 
1660   PetscFunctionBegin;
1661   bs  = A->rmap->bs;
1662   bs2 = bs * bs;
1663   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1664 
1665   bn  = row / bs; /* Block number */
1666   bp  = row % bs; /* Block Position */
1667   M   = ai[bn + 1] - ai[bn];
1668   *nz = bs * M;
1669 
1670   if (v) {
1671     *v = NULL;
1672     if (*nz) {
1673       PetscCall(PetscMalloc1(*nz, v));
1674       for (i = 0; i < M; i++) { /* for each block in the block row */
1675         v_i  = *v + i * bs;
1676         aa_i = aa + bs2 * (ai[bn] + i);
1677         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
1678       }
1679     }
1680   }
1681 
1682   if (idx) {
1683     *idx = NULL;
1684     if (*nz) {
1685       PetscCall(PetscMalloc1(*nz, idx));
1686       for (i = 0; i < M; i++) { /* for each block in the block row */
1687         idx_i = *idx + i * bs;
1688         itmp  = bs * aj[ai[bn] + i];
1689         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
1690       }
1691     }
1692   }
1693   PetscFunctionReturn(PETSC_SUCCESS);
1694 }
1695 
1696 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1697 {
1698   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1699 
1700   PetscFunctionBegin;
1701   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
1702   PetscFunctionReturn(PETSC_SUCCESS);
1703 }
1704 
1705 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1706 {
1707   PetscFunctionBegin;
1708   if (nz) *nz = 0;
1709   if (idx) PetscCall(PetscFree(*idx));
1710   if (v) PetscCall(PetscFree(*v));
1711   PetscFunctionReturn(PETSC_SUCCESS);
1712 }
1713 
1714 PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1715 {
1716   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
1717   Mat          C;
1718   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
1719   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
1720   MatScalar   *ata, *aa = a->a;
1721 
1722   PetscFunctionBegin;
1723   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1724   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1725   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1726     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1727 
1728     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
1729     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
1730     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
1731     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
1732 
1733     at  = (Mat_SeqBAIJ *)C->data;
1734     ati = at->i;
1735     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1736   } else {
1737     C   = *B;
1738     at  = (Mat_SeqBAIJ *)C->data;
1739     ati = at->i;
1740   }
1741 
1742   atj = at->j;
1743   ata = at->a;
1744 
1745   /* Copy ati into atfill so we have locations of the next free space in atj */
1746   PetscCall(PetscArraycpy(atfill, ati, nbs));
1747 
1748   /* Walk through A row-wise and mark nonzero entries of A^T. */
1749   for (i = 0; i < mbs; i++) {
1750     anzj = ai[i + 1] - ai[i];
1751     for (j = 0; j < anzj; j++) {
1752       atj[atfill[*aj]] = i;
1753       for (kr = 0; kr < bs; kr++) {
1754         for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
1755       }
1756       atfill[*aj++] += 1;
1757     }
1758   }
1759   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1760   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1761 
1762   /* Clean up temporary space and complete requests. */
1763   PetscCall(PetscFree(atfill));
1764 
1765   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1766     PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1767     *B = C;
1768   } else {
1769     PetscCall(MatHeaderMerge(A, &C));
1770   }
1771   PetscFunctionReturn(PETSC_SUCCESS);
1772 }
1773 
1774 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1775 {
1776   Mat Btrans;
1777 
1778   PetscFunctionBegin;
1779   *f = PETSC_FALSE;
1780   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
1781   PetscCall(MatEqual_SeqBAIJ(B, Btrans, f));
1782   PetscCall(MatDestroy(&Btrans));
1783   PetscFunctionReturn(PETSC_SUCCESS);
1784 }
1785 
1786 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1787 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1788 {
1789   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1790   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1791   PetscInt    *rowlens, *colidxs;
1792   PetscScalar *matvals;
1793 
1794   PetscFunctionBegin;
1795   PetscCall(PetscViewerSetUp(viewer));
1796 
1797   M  = mat->rmap->N;
1798   N  = mat->cmap->N;
1799   m  = mat->rmap->n;
1800   bs = mat->rmap->bs;
1801   nz = bs * bs * A->nz;
1802 
1803   /* write matrix header */
1804   header[0] = MAT_FILE_CLASSID;
1805   header[1] = M;
1806   header[2] = N;
1807   header[3] = nz;
1808   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1809 
1810   /* store row lengths */
1811   PetscCall(PetscMalloc1(m, &rowlens));
1812   for (cnt = 0, i = 0; i < A->mbs; i++)
1813     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
1814   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
1815   PetscCall(PetscFree(rowlens));
1816 
1817   /* store column indices  */
1818   PetscCall(PetscMalloc1(nz, &colidxs));
1819   for (cnt = 0, i = 0; i < A->mbs; i++)
1820     for (k = 0; k < bs; k++)
1821       for (j = A->i[i]; j < A->i[i + 1]; j++)
1822         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
1823   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1824   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
1825   PetscCall(PetscFree(colidxs));
1826 
1827   /* store nonzero values */
1828   PetscCall(PetscMalloc1(nz, &matvals));
1829   for (cnt = 0, i = 0; i < A->mbs; i++)
1830     for (k = 0; k < bs; k++)
1831       for (j = A->i[i]; j < A->i[i + 1]; j++)
1832         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
1833   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1834   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
1835   PetscCall(PetscFree(matvals));
1836 
1837   /* write block size option to the viewer's .info file */
1838   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1839   PetscFunctionReturn(PETSC_SUCCESS);
1840 }
1841 
1842 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1843 {
1844   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1845   PetscInt     i, bs = A->rmap->bs, k;
1846 
1847   PetscFunctionBegin;
1848   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1849   for (i = 0; i < a->mbs; i++) {
1850     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
1851     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
1852     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1853   }
1854   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1855   PetscFunctionReturn(PETSC_SUCCESS);
1856 }
1857 
1858 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1859 {
1860   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1861   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1862   PetscViewerFormat format;
1863 
1864   PetscFunctionBegin;
1865   if (A->structure_only) {
1866     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
1867     PetscFunctionReturn(PETSC_SUCCESS);
1868   }
1869 
1870   PetscCall(PetscViewerGetFormat(viewer, &format));
1871   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1872     PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1873   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1874     const char *matname;
1875     Mat         aij;
1876     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
1877     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
1878     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
1879     PetscCall(MatView(aij, viewer));
1880     PetscCall(MatDestroy(&aij));
1881   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1882     PetscFunctionReturn(PETSC_SUCCESS);
1883   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1884     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1885     for (i = 0; i < a->mbs; i++) {
1886       for (j = 0; j < bs; j++) {
1887         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1888         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1889           for (l = 0; l < bs; l++) {
1890 #if defined(PETSC_USE_COMPLEX)
1891             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1892               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1893             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1894               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1895             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1896               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1897             }
1898 #else
1899             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1900 #endif
1901           }
1902         }
1903         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1904       }
1905     }
1906     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1907   } else {
1908     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1909     for (i = 0; i < a->mbs; i++) {
1910       for (j = 0; j < bs; j++) {
1911         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1912         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1913           for (l = 0; l < bs; l++) {
1914 #if defined(PETSC_USE_COMPLEX)
1915             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
1916               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1917             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
1918               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1919             } else {
1920               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1921             }
1922 #else
1923             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1924 #endif
1925           }
1926         }
1927         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1928       }
1929     }
1930     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1931   }
1932   PetscCall(PetscViewerFlush(viewer));
1933   PetscFunctionReturn(PETSC_SUCCESS);
1934 }
1935 
1936 #include <petscdraw.h>
1937 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1938 {
1939   Mat               A = (Mat)Aa;
1940   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1941   PetscInt          row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2;
1942   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1943   MatScalar        *aa;
1944   PetscViewer       viewer;
1945   PetscViewerFormat format;
1946 
1947   PetscFunctionBegin;
1948   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
1949   PetscCall(PetscViewerGetFormat(viewer, &format));
1950   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
1951 
1952   /* loop over matrix elements drawing boxes */
1953 
1954   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1955     PetscDrawCollectiveBegin(draw);
1956     /* Blue for negative, Cyan for zero and  Red for positive */
1957     color = PETSC_DRAW_BLUE;
1958     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1959       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1960         y_l = A->rmap->N - row - 1.0;
1961         y_r = y_l + 1.0;
1962         x_l = a->j[j] * bs;
1963         x_r = x_l + 1.0;
1964         aa  = a->a + j * bs2;
1965         for (k = 0; k < bs; k++) {
1966           for (l = 0; l < bs; l++) {
1967             if (PetscRealPart(*aa++) >= 0.) continue;
1968             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1969           }
1970         }
1971       }
1972     }
1973     color = PETSC_DRAW_CYAN;
1974     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1975       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1976         y_l = A->rmap->N - row - 1.0;
1977         y_r = y_l + 1.0;
1978         x_l = a->j[j] * bs;
1979         x_r = x_l + 1.0;
1980         aa  = a->a + j * bs2;
1981         for (k = 0; k < bs; k++) {
1982           for (l = 0; l < bs; l++) {
1983             if (PetscRealPart(*aa++) != 0.) continue;
1984             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1985           }
1986         }
1987       }
1988     }
1989     color = PETSC_DRAW_RED;
1990     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1991       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1992         y_l = A->rmap->N - row - 1.0;
1993         y_r = y_l + 1.0;
1994         x_l = a->j[j] * bs;
1995         x_r = x_l + 1.0;
1996         aa  = a->a + j * bs2;
1997         for (k = 0; k < bs; k++) {
1998           for (l = 0; l < bs; l++) {
1999             if (PetscRealPart(*aa++) <= 0.) continue;
2000             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2001           }
2002         }
2003       }
2004     }
2005     PetscDrawCollectiveEnd(draw);
2006   } else {
2007     /* use contour shading to indicate magnitude of values */
2008     /* first determine max of all nonzero values */
2009     PetscReal minv = 0.0, maxv = 0.0;
2010     PetscDraw popup;
2011 
2012     for (i = 0; i < a->nz * a->bs2; i++) {
2013       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
2014     }
2015     if (minv >= maxv) maxv = minv + PETSC_SMALL;
2016     PetscCall(PetscDrawGetPopup(draw, &popup));
2017     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
2018 
2019     PetscDrawCollectiveBegin(draw);
2020     for (i = 0, row = 0; i < mbs; i++, row += bs) {
2021       for (j = a->i[i]; j < a->i[i + 1]; j++) {
2022         y_l = A->rmap->N - row - 1.0;
2023         y_r = y_l + 1.0;
2024         x_l = a->j[j] * bs;
2025         x_r = x_l + 1.0;
2026         aa  = a->a + j * bs2;
2027         for (k = 0; k < bs; k++) {
2028           for (l = 0; l < bs; l++) {
2029             MatScalar v = *aa++;
2030             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
2031             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2032           }
2033         }
2034       }
2035     }
2036     PetscDrawCollectiveEnd(draw);
2037   }
2038   PetscFunctionReturn(PETSC_SUCCESS);
2039 }
2040 
2041 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2042 {
2043   PetscReal xl, yl, xr, yr, w, h;
2044   PetscDraw draw;
2045   PetscBool isnull;
2046 
2047   PetscFunctionBegin;
2048   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
2049   PetscCall(PetscDrawIsNull(draw, &isnull));
2050   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
2051 
2052   xr = A->cmap->n;
2053   yr = A->rmap->N;
2054   h  = yr / 10.0;
2055   w  = xr / 10.0;
2056   xr += w;
2057   yr += h;
2058   xl = -w;
2059   yl = -h;
2060   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
2061   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
2062   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
2063   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
2064   PetscCall(PetscDrawSave(draw));
2065   PetscFunctionReturn(PETSC_SUCCESS);
2066 }
2067 
2068 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2069 {
2070   PetscBool iascii, isbinary, isdraw;
2071 
2072   PetscFunctionBegin;
2073   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
2074   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2075   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
2076   if (iascii) {
2077     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
2078   } else if (isbinary) {
2079     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
2080   } else if (isdraw) {
2081     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
2082   } else {
2083     Mat B;
2084     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2085     PetscCall(MatView(B, viewer));
2086     PetscCall(MatDestroy(&B));
2087   }
2088   PetscFunctionReturn(PETSC_SUCCESS);
2089 }
2090 
2091 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2092 {
2093   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2094   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2095   PetscInt    *ai = a->i, *ailen = a->ilen;
2096   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
2097   MatScalar   *ap, *aa = a->a;
2098 
2099   PetscFunctionBegin;
2100   for (k = 0; k < m; k++) { /* loop over rows */
2101     row  = im[k];
2102     brow = row / bs;
2103     if (row < 0) {
2104       v += n;
2105       continue;
2106     } /* negative row */
2107     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2108     rp   = aj ? aj + ai[brow] : NULL;       /* mustn't add to NULL, that is UB */
2109     ap   = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */
2110     nrow = ailen[brow];
2111     for (l = 0; l < n; l++) { /* loop over columns */
2112       if (in[l] < 0) {
2113         v++;
2114         continue;
2115       } /* negative column */
2116       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
2117       col  = in[l];
2118       bcol = col / bs;
2119       cidx = col % bs;
2120       ridx = row % bs;
2121       high = nrow;
2122       low  = 0; /* assume unsorted */
2123       while (high - low > 5) {
2124         t = (low + high) / 2;
2125         if (rp[t] > bcol) high = t;
2126         else low = t;
2127       }
2128       for (i = low; i < high; i++) {
2129         if (rp[i] > bcol) break;
2130         if (rp[i] == bcol) {
2131           *v++ = ap[bs2 * i + bs * cidx + ridx];
2132           goto finished;
2133         }
2134       }
2135       *v++ = 0.0;
2136     finished:;
2137     }
2138   }
2139   PetscFunctionReturn(PETSC_SUCCESS);
2140 }
2141 
2142 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2143 {
2144   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2145   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2146   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2147   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2148   PetscBool          roworiented = a->roworiented;
2149   const PetscScalar *value       = v;
2150   MatScalar         *ap = NULL, *aa = a->a, *bap;
2151 
2152   PetscFunctionBegin;
2153   if (roworiented) {
2154     stepval = (n - 1) * bs;
2155   } else {
2156     stepval = (m - 1) * bs;
2157   }
2158   for (k = 0; k < m; k++) { /* loop over added rows */
2159     row = im[k];
2160     if (row < 0) continue;
2161     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
2162     rp = aj + ai[row];
2163     if (!A->structure_only) ap = aa + bs2 * ai[row];
2164     rmax = imax[row];
2165     nrow = ailen[row];
2166     low  = 0;
2167     high = nrow;
2168     for (l = 0; l < n; l++) { /* loop over added columns */
2169       if (in[l] < 0) continue;
2170       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
2171       col = in[l];
2172       if (!A->structure_only) {
2173         if (roworiented) {
2174           value = v + (k * (stepval + bs) + l) * bs;
2175         } else {
2176           value = v + (l * (stepval + bs) + k) * bs;
2177         }
2178       }
2179       if (col <= lastcol) low = 0;
2180       else high = nrow;
2181       lastcol = col;
2182       while (high - low > 7) {
2183         t = (low + high) / 2;
2184         if (rp[t] > col) high = t;
2185         else low = t;
2186       }
2187       for (i = low; i < high; i++) {
2188         if (rp[i] > col) break;
2189         if (rp[i] == col) {
2190           if (A->structure_only) goto noinsert2;
2191           bap = ap + bs2 * i;
2192           if (roworiented) {
2193             if (is == ADD_VALUES) {
2194               for (ii = 0; ii < bs; ii++, value += stepval) {
2195                 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2196               }
2197             } else {
2198               for (ii = 0; ii < bs; ii++, value += stepval) {
2199                 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2200               }
2201             }
2202           } else {
2203             if (is == ADD_VALUES) {
2204               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2205                 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
2206                 bap += bs;
2207               }
2208             } else {
2209               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2210                 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
2211                 bap += bs;
2212               }
2213             }
2214           }
2215           goto noinsert2;
2216         }
2217       }
2218       if (nonew == 1) goto noinsert2;
2219       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2220       if (A->structure_only) {
2221         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
2222       } else {
2223         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2224       }
2225       N = nrow++ - 1;
2226       high++;
2227       /* shift up all the later entries in this row */
2228       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2229       rp[i] = col;
2230       if (!A->structure_only) {
2231         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2232         bap = ap + bs2 * i;
2233         if (roworiented) {
2234           for (ii = 0; ii < bs; ii++, value += stepval) {
2235             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2236           }
2237         } else {
2238           for (ii = 0; ii < bs; ii++, value += stepval) {
2239             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2240           }
2241         }
2242       }
2243     noinsert2:;
2244       low = i;
2245     }
2246     ailen[row] = nrow;
2247   }
2248   PetscFunctionReturn(PETSC_SUCCESS);
2249 }
2250 
2251 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2252 {
2253   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2254   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2255   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2256   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2257   MatScalar   *aa    = a->a, *ap;
2258   PetscReal    ratio = 0.6;
2259 
2260   PetscFunctionBegin;
2261   if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS);
2262 
2263   if (m) rmax = ailen[0];
2264   for (i = 1; i < mbs; i++) {
2265     /* move each row back by the amount of empty slots (fshift) before it*/
2266     fshift += imax[i - 1] - ailen[i - 1];
2267     rmax = PetscMax(rmax, ailen[i]);
2268     if (fshift) {
2269       ip = aj + ai[i];
2270       ap = aa + bs2 * ai[i];
2271       N  = ailen[i];
2272       PetscCall(PetscArraymove(ip - fshift, ip, N));
2273       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2274     }
2275     ai[i] = ai[i - 1] + ailen[i - 1];
2276   }
2277   if (mbs) {
2278     fshift += imax[mbs - 1] - ailen[mbs - 1];
2279     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2280   }
2281 
2282   /* reset ilen and imax for each row */
2283   a->nonzerorowcnt = 0;
2284   if (A->structure_only) {
2285     PetscCall(PetscFree2(a->imax, a->ilen));
2286   } else { /* !A->structure_only */
2287     for (i = 0; i < mbs; i++) {
2288       ailen[i] = imax[i] = ai[i + 1] - ai[i];
2289       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2290     }
2291   }
2292   a->nz = ai[mbs];
2293 
2294   /* diagonals may have moved, so kill the diagonal pointers */
2295   a->idiagvalid = PETSC_FALSE;
2296   if (fshift && a->diag) PetscCall(PetscFree(a->diag));
2297   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
2298   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
2299   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
2300   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
2301 
2302   A->info.mallocs += a->reallocs;
2303   a->reallocs         = 0;
2304   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2305   a->rmax             = rmax;
2306 
2307   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
2308   PetscFunctionReturn(PETSC_SUCCESS);
2309 }
2310 
2311 /*
2312    This function returns an array of flags which indicate the locations of contiguous
2313    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2314    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2315    Assume: sizes should be long enough to hold all the values.
2316 */
2317 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2318 {
2319   PetscInt j = 0;
2320 
2321   PetscFunctionBegin;
2322   for (PetscInt i = 0; i < n; j++) {
2323     PetscInt row = idx[i];
2324     if (row % bs != 0) { /* Not the beginning of a block */
2325       sizes[j] = 1;
2326       i++;
2327     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2328       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2329       i++;
2330     } else { /* Beginning of the block, so check if the complete block exists */
2331       PetscBool flg = PETSC_TRUE;
2332       for (PetscInt k = 1; k < bs; k++) {
2333         if (row + k != idx[i + k]) { /* break in the block */
2334           flg = PETSC_FALSE;
2335           break;
2336         }
2337       }
2338       if (flg) { /* No break in the bs */
2339         sizes[j] = bs;
2340         i += bs;
2341       } else {
2342         sizes[j] = 1;
2343         i++;
2344       }
2345     }
2346   }
2347   *bs_max = j;
2348   PetscFunctionReturn(PETSC_SUCCESS);
2349 }
2350 
2351 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2352 {
2353   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2354   PetscInt           i, j, k, count, *rows;
2355   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
2356   PetscScalar        zero = 0.0;
2357   MatScalar         *aa;
2358   const PetscScalar *xx;
2359   PetscScalar       *bb;
2360 
2361   PetscFunctionBegin;
2362   /* fix right hand side if needed */
2363   if (x && b) {
2364     PetscCall(VecGetArrayRead(x, &xx));
2365     PetscCall(VecGetArray(b, &bb));
2366     for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
2367     PetscCall(VecRestoreArrayRead(x, &xx));
2368     PetscCall(VecRestoreArray(b, &bb));
2369   }
2370 
2371   /* Make a copy of the IS and  sort it */
2372   /* allocate memory for rows,sizes */
2373   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2374 
2375   /* copy IS values to rows, and sort them */
2376   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
2377   PetscCall(PetscSortInt(is_n, rows));
2378 
2379   if (baij->keepnonzeropattern) {
2380     for (i = 0; i < is_n; i++) sizes[i] = 1;
2381     bs_max = is_n;
2382   } else {
2383     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2384     A->nonzerostate++;
2385   }
2386 
2387   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2388     row = rows[j];
2389     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2390     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2391     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
2392     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2393       if (diag != (PetscScalar)0.0) {
2394         if (baij->ilen[row / bs] > 0) {
2395           baij->ilen[row / bs]       = 1;
2396           baij->j[baij->i[row / bs]] = row / bs;
2397 
2398           PetscCall(PetscArrayzero(aa, count * bs));
2399         }
2400         /* Now insert all the diagonal values for this bs */
2401         for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES));
2402       } else { /* (diag == 0.0) */
2403         baij->ilen[row / bs] = 0;
2404       }      /* end (diag == 0.0) */
2405     } else { /* (sizes[i] != bs) */
2406       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2407       for (k = 0; k < count; k++) {
2408         aa[0] = zero;
2409         aa += bs;
2410       }
2411       if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES));
2412     }
2413   }
2414 
2415   PetscCall(PetscFree2(rows, sizes));
2416   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2417   PetscFunctionReturn(PETSC_SUCCESS);
2418 }
2419 
2420 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2421 {
2422   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2423   PetscInt           i, j, k, count;
2424   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
2425   PetscScalar        zero = 0.0;
2426   MatScalar         *aa;
2427   const PetscScalar *xx;
2428   PetscScalar       *bb;
2429   PetscBool         *zeroed, vecs = PETSC_FALSE;
2430 
2431   PetscFunctionBegin;
2432   /* fix right hand side if needed */
2433   if (x && b) {
2434     PetscCall(VecGetArrayRead(x, &xx));
2435     PetscCall(VecGetArray(b, &bb));
2436     vecs = PETSC_TRUE;
2437   }
2438 
2439   /* zero the columns */
2440   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
2441   for (i = 0; i < is_n; i++) {
2442     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
2443     zeroed[is_idx[i]] = PETSC_TRUE;
2444   }
2445   for (i = 0; i < A->rmap->N; i++) {
2446     if (!zeroed[i]) {
2447       row = i / bs;
2448       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
2449         for (k = 0; k < bs; k++) {
2450           col = bs * baij->j[j] + k;
2451           if (zeroed[col]) {
2452             aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
2453             if (vecs) bb[i] -= aa[0] * xx[col];
2454             aa[0] = 0.0;
2455           }
2456         }
2457       }
2458     } else if (vecs) bb[i] = diag * xx[i];
2459   }
2460   PetscCall(PetscFree(zeroed));
2461   if (vecs) {
2462     PetscCall(VecRestoreArrayRead(x, &xx));
2463     PetscCall(VecRestoreArray(b, &bb));
2464   }
2465 
2466   /* zero the rows */
2467   for (i = 0; i < is_n; i++) {
2468     row   = is_idx[i];
2469     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2470     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
2471     for (k = 0; k < count; k++) {
2472       aa[0] = zero;
2473       aa += bs;
2474     }
2475     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
2476   }
2477   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2478   PetscFunctionReturn(PETSC_SUCCESS);
2479 }
2480 
2481 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2482 {
2483   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2484   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2485   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2486   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2487   PetscInt     ridx, cidx, bs2                 = a->bs2;
2488   PetscBool    roworiented = a->roworiented;
2489   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;
2490 
2491   PetscFunctionBegin;
2492   for (k = 0; k < m; k++) { /* loop over added rows */
2493     row  = im[k];
2494     brow = row / bs;
2495     if (row < 0) continue;
2496     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
2497     rp = aj + ai[brow];
2498     if (!A->structure_only) ap = aa + bs2 * ai[brow];
2499     rmax = imax[brow];
2500     nrow = ailen[brow];
2501     low  = 0;
2502     high = nrow;
2503     for (l = 0; l < n; l++) { /* loop over added columns */
2504       if (in[l] < 0) continue;
2505       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
2506       col  = in[l];
2507       bcol = col / bs;
2508       ridx = row % bs;
2509       cidx = col % bs;
2510       if (!A->structure_only) {
2511         if (roworiented) {
2512           value = v[l + k * n];
2513         } else {
2514           value = v[k + l * m];
2515         }
2516       }
2517       if (col <= lastcol) low = 0;
2518       else high = nrow;
2519       lastcol = col;
2520       while (high - low > 7) {
2521         t = (low + high) / 2;
2522         if (rp[t] > bcol) high = t;
2523         else low = t;
2524       }
2525       for (i = low; i < high; i++) {
2526         if (rp[i] > bcol) break;
2527         if (rp[i] == bcol) {
2528           bap = ap + bs2 * i + bs * cidx + ridx;
2529           if (!A->structure_only) {
2530             if (is == ADD_VALUES) *bap += value;
2531             else *bap = value;
2532           }
2533           goto noinsert1;
2534         }
2535       }
2536       if (nonew == 1) goto noinsert1;
2537       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2538       if (A->structure_only) {
2539         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2540       } else {
2541         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2542       }
2543       N = nrow++ - 1;
2544       high++;
2545       /* shift up all the later entries in this row */
2546       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2547       rp[i] = bcol;
2548       if (!A->structure_only) {
2549         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2550         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2551         ap[bs2 * i + bs * cidx + ridx] = value;
2552       }
2553       a->nz++;
2554       A->nonzerostate++;
2555     noinsert1:;
2556       low = i;
2557     }
2558     ailen[brow] = nrow;
2559   }
2560   PetscFunctionReturn(PETSC_SUCCESS);
2561 }
2562 
2563 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2564 {
2565   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
2566   Mat          outA;
2567   PetscBool    row_identity, col_identity;
2568 
2569   PetscFunctionBegin;
2570   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
2571   PetscCall(ISIdentity(row, &row_identity));
2572   PetscCall(ISIdentity(col, &col_identity));
2573   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
2574 
2575   outA            = inA;
2576   inA->factortype = MAT_FACTOR_LU;
2577   PetscCall(PetscFree(inA->solvertype));
2578   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
2579 
2580   PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2581 
2582   PetscCall(PetscObjectReference((PetscObject)row));
2583   PetscCall(ISDestroy(&a->row));
2584   a->row = row;
2585   PetscCall(PetscObjectReference((PetscObject)col));
2586   PetscCall(ISDestroy(&a->col));
2587   a->col = col;
2588 
2589   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2590   PetscCall(ISDestroy(&a->icol));
2591   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2592 
2593   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2594   if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
2595   PetscCall(MatLUFactorNumeric(outA, inA, info));
2596   PetscFunctionReturn(PETSC_SUCCESS);
2597 }
2598 
2599 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2600 {
2601   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2602 
2603   PetscFunctionBegin;
2604   baij->nz = baij->maxnz;
2605   PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2606   PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
2607   PetscFunctionReturn(PETSC_SUCCESS);
2608 }
2609 
2610 /*@
2611   MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix.
2612 
2613   Input Parameters:
2614 + mat     - the `MATSEQBAIJ` matrix
2615 - indices - the column indices
2616 
2617   Level: advanced
2618 
2619   Notes:
2620   This can be called if you have precomputed the nonzero structure of the
2621   matrix and want to provide it to the matrix object to improve the performance
2622   of the `MatSetValues()` operation.
2623 
2624   You MUST have set the correct numbers of nonzeros per row in the call to
2625   `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
2626 
2627   MUST be called before any calls to `MatSetValues()`
2628 
2629 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()`
2630 @*/
2631 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2632 {
2633   PetscFunctionBegin;
2634   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2635   PetscAssertPointer(indices, 2);
2636   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices));
2637   PetscFunctionReturn(PETSC_SUCCESS);
2638 }
2639 
2640 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2641 {
2642   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2643   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2644   PetscReal    atmp;
2645   PetscScalar *x, zero = 0.0;
2646   MatScalar   *aa;
2647   PetscInt     ncols, brow, krow, kcol;
2648 
2649   PetscFunctionBegin;
2650   /* why is this not a macro???????????????????????????????????????????????????????????????? */
2651   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2652   bs  = A->rmap->bs;
2653   aa  = a->a;
2654   ai  = a->i;
2655   aj  = a->j;
2656   mbs = a->mbs;
2657 
2658   PetscCall(VecSet(v, zero));
2659   PetscCall(VecGetArray(v, &x));
2660   PetscCall(VecGetLocalSize(v, &n));
2661   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2662   for (i = 0; i < mbs; i++) {
2663     ncols = ai[1] - ai[0];
2664     ai++;
2665     brow = bs * i;
2666     for (j = 0; j < ncols; j++) {
2667       for (kcol = 0; kcol < bs; kcol++) {
2668         for (krow = 0; krow < bs; krow++) {
2669           atmp = PetscAbsScalar(*aa);
2670           aa++;
2671           row = brow + krow; /* row index */
2672           if (PetscAbsScalar(x[row]) < atmp) {
2673             x[row] = atmp;
2674             if (idx) idx[row] = bs * (*aj) + kcol;
2675           }
2676         }
2677       }
2678       aj++;
2679     }
2680   }
2681   PetscCall(VecRestoreArray(v, &x));
2682   PetscFunctionReturn(PETSC_SUCCESS);
2683 }
2684 
2685 PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2686 {
2687   PetscFunctionBegin;
2688   /* If the two matrices have the same copy implementation, use fast copy. */
2689   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2690     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
2691     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2692     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
2693 
2694     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
2695     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
2696     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
2697     PetscCall(PetscObjectStateIncrease((PetscObject)B));
2698   } else {
2699     PetscCall(MatCopy_Basic(A, B, str));
2700   }
2701   PetscFunctionReturn(PETSC_SUCCESS);
2702 }
2703 
2704 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2705 {
2706   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2707 
2708   PetscFunctionBegin;
2709   *array = a->a;
2710   PetscFunctionReturn(PETSC_SUCCESS);
2711 }
2712 
2713 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2714 {
2715   PetscFunctionBegin;
2716   *array = NULL;
2717   PetscFunctionReturn(PETSC_SUCCESS);
2718 }
2719 
2720 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2721 {
2722   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
2723   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
2724   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
2725 
2726   PetscFunctionBegin;
2727   /* Set the number of nonzeros in the new matrix */
2728   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
2729   PetscFunctionReturn(PETSC_SUCCESS);
2730 }
2731 
2732 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2733 {
2734   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
2735   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2736   PetscBLASInt one = 1;
2737 
2738   PetscFunctionBegin;
2739   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2740     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2741     if (e) {
2742       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2743       if (e) {
2744         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2745         if (e) str = SAME_NONZERO_PATTERN;
2746       }
2747     }
2748     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2749   }
2750   if (str == SAME_NONZERO_PATTERN) {
2751     PetscScalar  alpha = a;
2752     PetscBLASInt bnz;
2753     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2754     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
2755     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2756   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2757     PetscCall(MatAXPY_Basic(Y, a, X, str));
2758   } else {
2759     Mat       B;
2760     PetscInt *nnz;
2761     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
2762     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
2763     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2764     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2765     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
2766     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
2767     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
2768     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
2769     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
2770     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2771     PetscCall(MatHeaderMerge(Y, &B));
2772     PetscCall(PetscFree(nnz));
2773   }
2774   PetscFunctionReturn(PETSC_SUCCESS);
2775 }
2776 
2777 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2778 {
2779 #if PetscDefined(USE_COMPLEX)
2780   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2781   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2782   MatScalar   *aa = a->a;
2783 
2784   PetscFunctionBegin;
2785   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
2786   PetscFunctionReturn(PETSC_SUCCESS);
2787 #else
2788   (void)A;
2789   return PETSC_SUCCESS;
2790 #endif
2791 }
2792 
2793 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2794 {
2795 #if PetscDefined(USE_COMPLEX)
2796   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2797   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2798   MatScalar   *aa = a->a;
2799 
2800   PetscFunctionBegin;
2801   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
2802   PetscFunctionReturn(PETSC_SUCCESS);
2803 #else
2804   (void)A;
2805   return PETSC_SUCCESS;
2806 #endif
2807 }
2808 
2809 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2810 {
2811 #if PetscDefined(USE_COMPLEX)
2812   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2813   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2814   MatScalar   *aa = a->a;
2815 
2816   PetscFunctionBegin;
2817   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2818   PetscFunctionReturn(PETSC_SUCCESS);
2819 #else
2820   (void)A;
2821   return PETSC_SUCCESS;
2822 #endif
2823 }
2824 
2825 /*
2826     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2827 */
2828 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2829 {
2830   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
2831   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
2832   PetscInt     nz = a->i[m], row, *jj, mr, col;
2833 
2834   PetscFunctionBegin;
2835   *nn = n;
2836   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2837   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
2838   PetscCall(PetscCalloc1(n, &collengths));
2839   PetscCall(PetscMalloc1(n + 1, &cia));
2840   PetscCall(PetscMalloc1(nz, &cja));
2841   jj = a->j;
2842   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2843   cia[0] = oshift;
2844   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2845   PetscCall(PetscArrayzero(collengths, n));
2846   jj = a->j;
2847   for (row = 0; row < m; row++) {
2848     mr = a->i[row + 1] - a->i[row];
2849     for (i = 0; i < mr; i++) {
2850       col = *jj++;
2851 
2852       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2853     }
2854   }
2855   PetscCall(PetscFree(collengths));
2856   *ia = cia;
2857   *ja = cja;
2858   PetscFunctionReturn(PETSC_SUCCESS);
2859 }
2860 
2861 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2862 {
2863   PetscFunctionBegin;
2864   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2865   PetscCall(PetscFree(*ia));
2866   PetscCall(PetscFree(*ja));
2867   PetscFunctionReturn(PETSC_SUCCESS);
2868 }
2869 
2870 /*
2871  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2872  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2873  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2874  */
2875 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2876 {
2877   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2878   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2879   PetscInt     nz = a->i[m], row, *jj, mr, col;
2880   PetscInt    *cspidx;
2881 
2882   PetscFunctionBegin;
2883   *nn = n;
2884   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2885 
2886   PetscCall(PetscCalloc1(n, &collengths));
2887   PetscCall(PetscMalloc1(n + 1, &cia));
2888   PetscCall(PetscMalloc1(nz, &cja));
2889   PetscCall(PetscMalloc1(nz, &cspidx));
2890   jj = a->j;
2891   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2892   cia[0] = oshift;
2893   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2894   PetscCall(PetscArrayzero(collengths, n));
2895   jj = a->j;
2896   for (row = 0; row < m; row++) {
2897     mr = a->i[row + 1] - a->i[row];
2898     for (i = 0; i < mr; i++) {
2899       col                                         = *jj++;
2900       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2901       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2902     }
2903   }
2904   PetscCall(PetscFree(collengths));
2905   *ia    = cia;
2906   *ja    = cja;
2907   *spidx = cspidx;
2908   PetscFunctionReturn(PETSC_SUCCESS);
2909 }
2910 
2911 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2912 {
2913   PetscFunctionBegin;
2914   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
2915   PetscCall(PetscFree(*spidx));
2916   PetscFunctionReturn(PETSC_SUCCESS);
2917 }
2918 
2919 PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2920 {
2921   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
2922 
2923   PetscFunctionBegin;
2924   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
2925   PetscCall(MatShift_Basic(Y, a));
2926   PetscFunctionReturn(PETSC_SUCCESS);
2927 }
2928 
2929 PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep)
2930 {
2931   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2932   PetscInt     fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k;
2933   PetscInt     m = A->rmap->N, *ailen = a->ilen;
2934   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2935   MatScalar   *aa = a->a, *ap;
2936   PetscBool    zero;
2937 
2938   PetscFunctionBegin;
2939   PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix");
2940   if (m) rmax = ailen[0];
2941   for (i = 1; i <= mbs; i++) {
2942     for (k = ai[i - 1]; k < ai[i]; k++) {
2943       zero = PETSC_TRUE;
2944       ap   = aa + bs2 * k;
2945       for (j = 0; j < bs2 && zero; j++) {
2946         if (ap[j] != 0.0) zero = PETSC_FALSE;
2947       }
2948       if (zero && (aj[k] != i - 1 || !keep)) fshift++;
2949       else {
2950         if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1));
2951         aj[k - fshift] = aj[k];
2952         PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2));
2953       }
2954     }
2955     ai[i - 1] -= fshift_prev;
2956     fshift_prev  = fshift;
2957     ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1];
2958     a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0);
2959     rmax = PetscMax(rmax, ailen[i - 1]);
2960   }
2961   if (fshift) {
2962     if (mbs) {
2963       ai[mbs] -= fshift;
2964       a->nz = ai[mbs];
2965     }
2966     PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz));
2967     A->nonzerostate++;
2968     A->info.nz_unneeded += (PetscReal)fshift;
2969     a->rmax = rmax;
2970     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2971     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
2972   }
2973   PetscFunctionReturn(PETSC_SUCCESS);
2974 }
2975 
2976 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2977                                        MatGetRow_SeqBAIJ,
2978                                        MatRestoreRow_SeqBAIJ,
2979                                        MatMult_SeqBAIJ_N,
2980                                        /* 4*/ MatMultAdd_SeqBAIJ_N,
2981                                        MatMultTranspose_SeqBAIJ,
2982                                        MatMultTransposeAdd_SeqBAIJ,
2983                                        NULL,
2984                                        NULL,
2985                                        NULL,
2986                                        /* 10*/ NULL,
2987                                        MatLUFactor_SeqBAIJ,
2988                                        NULL,
2989                                        NULL,
2990                                        MatTranspose_SeqBAIJ,
2991                                        /* 15*/ MatGetInfo_SeqBAIJ,
2992                                        MatEqual_SeqBAIJ,
2993                                        MatGetDiagonal_SeqBAIJ,
2994                                        MatDiagonalScale_SeqBAIJ,
2995                                        MatNorm_SeqBAIJ,
2996                                        /* 20*/ NULL,
2997                                        MatAssemblyEnd_SeqBAIJ,
2998                                        MatSetOption_SeqBAIJ,
2999                                        MatZeroEntries_SeqBAIJ,
3000                                        /* 24*/ MatZeroRows_SeqBAIJ,
3001                                        NULL,
3002                                        NULL,
3003                                        NULL,
3004                                        NULL,
3005                                        /* 29*/ MatSetUp_Seq_Hash,
3006                                        NULL,
3007                                        NULL,
3008                                        NULL,
3009                                        NULL,
3010                                        /* 34*/ MatDuplicate_SeqBAIJ,
3011                                        NULL,
3012                                        NULL,
3013                                        MatILUFactor_SeqBAIJ,
3014                                        NULL,
3015                                        /* 39*/ MatAXPY_SeqBAIJ,
3016                                        MatCreateSubMatrices_SeqBAIJ,
3017                                        MatIncreaseOverlap_SeqBAIJ,
3018                                        MatGetValues_SeqBAIJ,
3019                                        MatCopy_SeqBAIJ,
3020                                        /* 44*/ NULL,
3021                                        MatScale_SeqBAIJ,
3022                                        MatShift_SeqBAIJ,
3023                                        NULL,
3024                                        MatZeroRowsColumns_SeqBAIJ,
3025                                        /* 49*/ NULL,
3026                                        MatGetRowIJ_SeqBAIJ,
3027                                        MatRestoreRowIJ_SeqBAIJ,
3028                                        MatGetColumnIJ_SeqBAIJ,
3029                                        MatRestoreColumnIJ_SeqBAIJ,
3030                                        /* 54*/ MatFDColoringCreate_SeqXAIJ,
3031                                        NULL,
3032                                        NULL,
3033                                        NULL,
3034                                        MatSetValuesBlocked_SeqBAIJ,
3035                                        /* 59*/ MatCreateSubMatrix_SeqBAIJ,
3036                                        MatDestroy_SeqBAIJ,
3037                                        MatView_SeqBAIJ,
3038                                        NULL,
3039                                        NULL,
3040                                        /* 64*/ NULL,
3041                                        NULL,
3042                                        NULL,
3043                                        NULL,
3044                                        NULL,
3045                                        /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
3046                                        NULL,
3047                                        MatConvert_Basic,
3048                                        NULL,
3049                                        NULL,
3050                                        /* 74*/ NULL,
3051                                        MatFDColoringApply_BAIJ,
3052                                        NULL,
3053                                        NULL,
3054                                        NULL,
3055                                        /* 79*/ NULL,
3056                                        NULL,
3057                                        NULL,
3058                                        NULL,
3059                                        MatLoad_SeqBAIJ,
3060                                        /* 84*/ NULL,
3061                                        NULL,
3062                                        NULL,
3063                                        NULL,
3064                                        NULL,
3065                                        /* 89*/ NULL,
3066                                        NULL,
3067                                        NULL,
3068                                        NULL,
3069                                        NULL,
3070                                        /* 94*/ NULL,
3071                                        NULL,
3072                                        NULL,
3073                                        NULL,
3074                                        NULL,
3075                                        /* 99*/ NULL,
3076                                        NULL,
3077                                        NULL,
3078                                        MatConjugate_SeqBAIJ,
3079                                        NULL,
3080                                        /*104*/ NULL,
3081                                        MatRealPart_SeqBAIJ,
3082                                        MatImaginaryPart_SeqBAIJ,
3083                                        NULL,
3084                                        NULL,
3085                                        /*109*/ NULL,
3086                                        NULL,
3087                                        NULL,
3088                                        NULL,
3089                                        MatMissingDiagonal_SeqBAIJ,
3090                                        /*114*/ NULL,
3091                                        NULL,
3092                                        NULL,
3093                                        NULL,
3094                                        NULL,
3095                                        /*119*/ NULL,
3096                                        NULL,
3097                                        MatMultHermitianTranspose_SeqBAIJ,
3098                                        MatMultHermitianTransposeAdd_SeqBAIJ,
3099                                        NULL,
3100                                        /*124*/ NULL,
3101                                        MatGetColumnReductions_SeqBAIJ,
3102                                        MatInvertBlockDiagonal_SeqBAIJ,
3103                                        NULL,
3104                                        NULL,
3105                                        /*129*/ NULL,
3106                                        NULL,
3107                                        NULL,
3108                                        NULL,
3109                                        NULL,
3110                                        /*134*/ NULL,
3111                                        NULL,
3112                                        NULL,
3113                                        NULL,
3114                                        NULL,
3115                                        /*139*/ MatSetBlockSizes_Default,
3116                                        NULL,
3117                                        NULL,
3118                                        MatFDColoringSetUp_SeqXAIJ,
3119                                        NULL,
3120                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3121                                        MatDestroySubMatrices_SeqBAIJ,
3122                                        NULL,
3123                                        NULL,
3124                                        NULL,
3125                                        NULL,
3126                                        /*150*/ NULL,
3127                                        MatEliminateZeros_SeqBAIJ};
3128 
3129 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3130 {
3131   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3132   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3133 
3134   PetscFunctionBegin;
3135   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3136 
3137   /* allocate space for values if not already there */
3138   if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
3139 
3140   /* copy values over */
3141   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
3142   PetscFunctionReturn(PETSC_SUCCESS);
3143 }
3144 
3145 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3146 {
3147   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3148   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3149 
3150   PetscFunctionBegin;
3151   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3152   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
3153 
3154   /* copy values over */
3155   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
3156   PetscFunctionReturn(PETSC_SUCCESS);
3157 }
3158 
3159 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3160 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3161 
3162 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, PetscInt *nnz)
3163 {
3164   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
3165   PetscInt     i, mbs, nbs, bs2;
3166   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3167 
3168   PetscFunctionBegin;
3169   if (B->hash_active) {
3170     PetscInt bs;
3171     B->ops[0] = b->cops;
3172     PetscCall(PetscHMapIJVDestroy(&b->ht));
3173     PetscCall(MatGetBlockSize(B, &bs));
3174     if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht));
3175     PetscCall(PetscFree(b->dnz));
3176     PetscCall(PetscFree(b->bdnz));
3177     B->hash_active = PETSC_FALSE;
3178   }
3179   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3180   if (nz == MAT_SKIP_ALLOCATION) {
3181     skipallocation = PETSC_TRUE;
3182     nz             = 0;
3183   }
3184 
3185   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
3186   PetscCall(PetscLayoutSetUp(B->rmap));
3187   PetscCall(PetscLayoutSetUp(B->cmap));
3188   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3189 
3190   B->preallocated = PETSC_TRUE;
3191 
3192   mbs = B->rmap->n / bs;
3193   nbs = B->cmap->n / bs;
3194   bs2 = bs * bs;
3195 
3196   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3197 
3198   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3199   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3200   if (nnz) {
3201     for (i = 0; i < mbs; i++) {
3202       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
3203       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3204     }
3205   }
3206 
3207   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
3208   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3209   PetscOptionsEnd();
3210 
3211   if (!flg) {
3212     switch (bs) {
3213     case 1:
3214       B->ops->mult    = MatMult_SeqBAIJ_1;
3215       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3216       break;
3217     case 2:
3218       B->ops->mult    = MatMult_SeqBAIJ_2;
3219       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3220       break;
3221     case 3:
3222       B->ops->mult    = MatMult_SeqBAIJ_3;
3223       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3224       break;
3225     case 4:
3226       B->ops->mult    = MatMult_SeqBAIJ_4;
3227       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3228       break;
3229     case 5:
3230       B->ops->mult    = MatMult_SeqBAIJ_5;
3231       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3232       break;
3233     case 6:
3234       B->ops->mult    = MatMult_SeqBAIJ_6;
3235       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3236       break;
3237     case 7:
3238       B->ops->mult    = MatMult_SeqBAIJ_7;
3239       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3240       break;
3241     case 9: {
3242       PetscInt version = 1;
3243       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3244       switch (version) {
3245 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3246       case 1:
3247         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
3248         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
3249         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3250         break;
3251 #endif
3252       default:
3253         B->ops->mult    = MatMult_SeqBAIJ_N;
3254         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3255         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3256         break;
3257       }
3258       break;
3259     }
3260     case 11:
3261       B->ops->mult    = MatMult_SeqBAIJ_11;
3262       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3263       break;
3264     case 12: {
3265       PetscInt version = 1;
3266       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3267       switch (version) {
3268       case 1:
3269         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
3270         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3271         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3272         break;
3273       case 2:
3274         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
3275         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
3276         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3277         break;
3278 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3279       case 3:
3280         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
3281         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3282         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3283         break;
3284 #endif
3285       default:
3286         B->ops->mult    = MatMult_SeqBAIJ_N;
3287         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3288         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3289         break;
3290       }
3291       break;
3292     }
3293     case 15: {
3294       PetscInt version = 1;
3295       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3296       switch (version) {
3297       case 1:
3298         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3299         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3300         break;
3301       case 2:
3302         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
3303         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3304         break;
3305       case 3:
3306         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3307         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3308         break;
3309       case 4:
3310         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3311         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3312         break;
3313       default:
3314         B->ops->mult = MatMult_SeqBAIJ_N;
3315         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3316         break;
3317       }
3318       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3319       break;
3320     }
3321     default:
3322       B->ops->mult    = MatMult_SeqBAIJ_N;
3323       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3324       PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3325       break;
3326     }
3327   }
3328   B->ops->sor = MatSOR_SeqBAIJ;
3329   b->mbs      = mbs;
3330   b->nbs      = nbs;
3331   if (!skipallocation) {
3332     if (!b->imax) {
3333       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
3334 
3335       b->free_imax_ilen = PETSC_TRUE;
3336     }
3337     /* b->ilen will count nonzeros in each block row so far. */
3338     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3339     if (!nnz) {
3340       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3341       else if (nz < 0) nz = 1;
3342       nz = PetscMin(nz, nbs);
3343       for (i = 0; i < mbs; i++) b->imax[i] = nz;
3344       PetscCall(PetscIntMultError(nz, mbs, &nz));
3345     } else {
3346       PetscInt64 nz64 = 0;
3347       for (i = 0; i < mbs; i++) {
3348         b->imax[i] = nnz[i];
3349         nz64 += nnz[i];
3350       }
3351       PetscCall(PetscIntCast(nz64, &nz));
3352     }
3353 
3354     /* allocate the matrix space */
3355     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3356     if (B->structure_only) {
3357       PetscCall(PetscMalloc1(nz, &b->j));
3358       PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i));
3359     } else {
3360       PetscInt nzbs2 = 0;
3361       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
3362       PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i));
3363       PetscCall(PetscArrayzero(b->a, nz * bs2));
3364     }
3365     PetscCall(PetscArrayzero(b->j, nz));
3366 
3367     if (B->structure_only) {
3368       b->singlemalloc = PETSC_FALSE;
3369       b->free_a       = PETSC_FALSE;
3370     } else {
3371       b->singlemalloc = PETSC_TRUE;
3372       b->free_a       = PETSC_TRUE;
3373     }
3374     b->free_ij = PETSC_TRUE;
3375 
3376     b->i[0] = 0;
3377     for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3378 
3379   } else {
3380     b->free_a  = PETSC_FALSE;
3381     b->free_ij = PETSC_FALSE;
3382   }
3383 
3384   b->bs2              = bs2;
3385   b->mbs              = mbs;
3386   b->nz               = 0;
3387   b->maxnz            = nz;
3388   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3389   B->was_assembled    = PETSC_FALSE;
3390   B->assembled        = PETSC_FALSE;
3391   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3392   PetscFunctionReturn(PETSC_SUCCESS);
3393 }
3394 
3395 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3396 {
3397   PetscInt     i, m, nz, nz_max = 0, *nnz;
3398   PetscScalar *values      = NULL;
3399   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3400 
3401   PetscFunctionBegin;
3402   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
3403   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
3404   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
3405   PetscCall(PetscLayoutSetUp(B->rmap));
3406   PetscCall(PetscLayoutSetUp(B->cmap));
3407   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3408   m = B->rmap->n / bs;
3409 
3410   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
3411   PetscCall(PetscMalloc1(m + 1, &nnz));
3412   for (i = 0; i < m; i++) {
3413     nz = ii[i + 1] - ii[i];
3414     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3415     nz_max = PetscMax(nz_max, nz);
3416     nnz[i] = nz;
3417   }
3418   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
3419   PetscCall(PetscFree(nnz));
3420 
3421   values = (PetscScalar *)V;
3422   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3423   for (i = 0; i < m; i++) {
3424     PetscInt        ncols = ii[i + 1] - ii[i];
3425     const PetscInt *icols = jj + ii[i];
3426     if (bs == 1 || !roworiented) {
3427       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
3428       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
3429     } else {
3430       PetscInt j;
3431       for (j = 0; j < ncols; j++) {
3432         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
3433         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
3434       }
3435     }
3436   }
3437   if (!V) PetscCall(PetscFree(values));
3438   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3439   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3440   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3441   PetscFunctionReturn(PETSC_SUCCESS);
3442 }
3443 
3444 /*@C
3445   MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3446 
3447   Not Collective
3448 
3449   Input Parameter:
3450 . A - a `MATSEQBAIJ` matrix
3451 
3452   Output Parameter:
3453 . array - pointer to the data
3454 
3455   Level: intermediate
3456 
3457 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3458 @*/
3459 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array)
3460 {
3461   PetscFunctionBegin;
3462   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
3463   PetscFunctionReturn(PETSC_SUCCESS);
3464 }
3465 
3466 /*@C
3467   MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3468 
3469   Not Collective
3470 
3471   Input Parameters:
3472 + A     - a `MATSEQBAIJ` matrix
3473 - array - pointer to the data
3474 
3475   Level: intermediate
3476 
3477 .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3478 @*/
3479 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array)
3480 {
3481   PetscFunctionBegin;
3482   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
3483   PetscFunctionReturn(PETSC_SUCCESS);
3484 }
3485 
3486 /*MC
3487    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3488    block sparse compressed row format.
3489 
3490    Options Database Keys:
3491 + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()`
3492 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3493 
3494    Level: beginner
3495 
3496    Notes:
3497     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
3498     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
3499 
3500    Run with `-info` to see what version of the matrix-vector product is being used
3501 
3502 .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()`
3503 M*/
3504 
3505 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3506 
3507 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3508 {
3509   PetscMPIInt  size;
3510   Mat_SeqBAIJ *b;
3511 
3512   PetscFunctionBegin;
3513   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
3514   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3515 
3516   PetscCall(PetscNew(&b));
3517   B->data   = (void *)b;
3518   B->ops[0] = MatOps_Values;
3519 
3520   b->row          = NULL;
3521   b->col          = NULL;
3522   b->icol         = NULL;
3523   b->reallocs     = 0;
3524   b->saved_values = NULL;
3525 
3526   b->roworiented        = PETSC_TRUE;
3527   b->nonew              = 0;
3528   b->diag               = NULL;
3529   B->spptr              = NULL;
3530   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3531   b->keepnonzeropattern = PETSC_FALSE;
3532 
3533   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
3534   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
3535   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
3536   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
3537   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
3538   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
3539   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
3540   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
3541   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
3542   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
3543 #if defined(PETSC_HAVE_HYPRE)
3544   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
3545 #endif
3546   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
3547   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
3548   PetscFunctionReturn(PETSC_SUCCESS);
3549 }
3550 
3551 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3552 {
3553   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3554   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3555 
3556   PetscFunctionBegin;
3557   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix");
3558   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
3559 
3560   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3561     c->imax           = a->imax;
3562     c->ilen           = a->ilen;
3563     c->free_imax_ilen = PETSC_FALSE;
3564   } else {
3565     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3566     for (i = 0; i < mbs; i++) {
3567       c->imax[i] = a->imax[i];
3568       c->ilen[i] = a->ilen[i];
3569     }
3570     c->free_imax_ilen = PETSC_TRUE;
3571   }
3572 
3573   /* allocate the matrix space */
3574   if (mallocmatspace) {
3575     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3576       PetscCall(PetscCalloc1(bs2 * nz, &c->a));
3577 
3578       c->i            = a->i;
3579       c->j            = a->j;
3580       c->singlemalloc = PETSC_FALSE;
3581       c->free_a       = PETSC_TRUE;
3582       c->free_ij      = PETSC_FALSE;
3583       c->parent       = A;
3584       C->preallocated = PETSC_TRUE;
3585       C->assembled    = PETSC_TRUE;
3586 
3587       PetscCall(PetscObjectReference((PetscObject)A));
3588       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3589       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3590     } else {
3591       PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i));
3592 
3593       c->singlemalloc = PETSC_TRUE;
3594       c->free_a       = PETSC_TRUE;
3595       c->free_ij      = PETSC_TRUE;
3596 
3597       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3598       if (mbs > 0) {
3599         PetscCall(PetscArraycpy(c->j, a->j, nz));
3600         if (cpvalues == MAT_COPY_VALUES) {
3601           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
3602         } else {
3603           PetscCall(PetscArrayzero(c->a, bs2 * nz));
3604         }
3605       }
3606       C->preallocated = PETSC_TRUE;
3607       C->assembled    = PETSC_TRUE;
3608     }
3609   }
3610 
3611   c->roworiented = a->roworiented;
3612   c->nonew       = a->nonew;
3613 
3614   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
3615   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
3616 
3617   c->bs2 = a->bs2;
3618   c->mbs = a->mbs;
3619   c->nbs = a->nbs;
3620 
3621   if (a->diag) {
3622     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3623       c->diag      = a->diag;
3624       c->free_diag = PETSC_FALSE;
3625     } else {
3626       PetscCall(PetscMalloc1(mbs + 1, &c->diag));
3627       for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i];
3628       c->free_diag = PETSC_TRUE;
3629     }
3630   } else c->diag = NULL;
3631 
3632   c->nz         = a->nz;
3633   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3634   c->solve_work = NULL;
3635   c->mult_work  = NULL;
3636   c->sor_workt  = NULL;
3637   c->sor_work   = NULL;
3638 
3639   c->compressedrow.use   = a->compressedrow.use;
3640   c->compressedrow.nrows = a->compressedrow.nrows;
3641   if (a->compressedrow.use) {
3642     i = a->compressedrow.nrows;
3643     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
3644     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
3645     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
3646   } else {
3647     c->compressedrow.use    = PETSC_FALSE;
3648     c->compressedrow.i      = NULL;
3649     c->compressedrow.rindex = NULL;
3650   }
3651   c->nonzerorowcnt = a->nonzerorowcnt;
3652   C->nonzerostate  = A->nonzerostate;
3653 
3654   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
3655   PetscFunctionReturn(PETSC_SUCCESS);
3656 }
3657 
3658 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3659 {
3660   PetscFunctionBegin;
3661   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
3662   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
3663   PetscCall(MatSetType(*B, MATSEQBAIJ));
3664   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
3665   PetscFunctionReturn(PETSC_SUCCESS);
3666 }
3667 
3668 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3669 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3670 {
3671   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3672   PetscInt    *rowidxs, *colidxs;
3673   PetscScalar *matvals;
3674 
3675   PetscFunctionBegin;
3676   PetscCall(PetscViewerSetUp(viewer));
3677 
3678   /* read matrix header */
3679   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3680   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3681   M  = header[1];
3682   N  = header[2];
3683   nz = header[3];
3684   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3685   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3686   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3687 
3688   /* set block sizes from the viewer's .info file */
3689   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3690   /* set local and global sizes if not set already */
3691   if (mat->rmap->n < 0) mat->rmap->n = M;
3692   if (mat->cmap->n < 0) mat->cmap->n = N;
3693   if (mat->rmap->N < 0) mat->rmap->N = M;
3694   if (mat->cmap->N < 0) mat->cmap->N = N;
3695   PetscCall(PetscLayoutSetUp(mat->rmap));
3696   PetscCall(PetscLayoutSetUp(mat->cmap));
3697 
3698   /* check if the matrix sizes are correct */
3699   PetscCall(MatGetSize(mat, &rows, &cols));
3700   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3701   PetscCall(MatGetBlockSize(mat, &bs));
3702   PetscCall(MatGetLocalSize(mat, &m, &n));
3703   mbs = m / bs;
3704   nbs = n / bs;
3705 
3706   /* read in row lengths, column indices and nonzero values */
3707   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3708   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
3709   rowidxs[0] = 0;
3710   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3711   sum = rowidxs[m];
3712   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3713 
3714   /* read in column indices and nonzero values */
3715   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
3716   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
3717   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3718 
3719   {               /* preallocate matrix storage */
3720     PetscBT   bt; /* helper bit set to count nonzeros */
3721     PetscInt *nnz;
3722     PetscBool sbaij;
3723 
3724     PetscCall(PetscBTCreate(nbs, &bt));
3725     PetscCall(PetscCalloc1(mbs, &nnz));
3726     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3727     for (i = 0; i < mbs; i++) {
3728       PetscCall(PetscBTMemzero(nbs, bt));
3729       for (k = 0; k < bs; k++) {
3730         PetscInt row = bs * i + k;
3731         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3732           PetscInt col = colidxs[j];
3733           if (!sbaij || col >= row)
3734             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3735         }
3736       }
3737     }
3738     PetscCall(PetscBTDestroy(&bt));
3739     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
3740     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
3741     PetscCall(PetscFree(nnz));
3742   }
3743 
3744   /* store matrix values */
3745   for (i = 0; i < m; i++) {
3746     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
3747     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3748   }
3749 
3750   PetscCall(PetscFree(rowidxs));
3751   PetscCall(PetscFree2(colidxs, matvals));
3752   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3753   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3754   PetscFunctionReturn(PETSC_SUCCESS);
3755 }
3756 
3757 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3758 {
3759   PetscBool isbinary;
3760 
3761   PetscFunctionBegin;
3762   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3763   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3764   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
3765   PetscFunctionReturn(PETSC_SUCCESS);
3766 }
3767 
3768 /*@C
3769   MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3770   compressed row) format.  For good matrix assembly performance the
3771   user should preallocate the matrix storage by setting the parameter `nz`
3772   (or the array `nnz`).
3773 
3774   Collective
3775 
3776   Input Parameters:
3777 + comm - MPI communicator, set to `PETSC_COMM_SELF`
3778 . bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3779           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3780 . m    - number of rows
3781 . n    - number of columns
3782 . nz   - number of nonzero blocks  per block row (same for all rows)
3783 - nnz  - array containing the number of nonzero blocks in the various block rows
3784          (possibly different for each block row) or `NULL`
3785 
3786   Output Parameter:
3787 . A - the matrix
3788 
3789   Options Database Keys:
3790 + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3791 - -mat_block_size - size of the blocks to use
3792 
3793   Level: intermediate
3794 
3795   Notes:
3796   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3797   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3798   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
3799 
3800   The number of rows and columns must be divisible by blocksize.
3801 
3802   If the `nnz` parameter is given then the `nz` parameter is ignored
3803 
3804   A nonzero block is any block that as 1 or more nonzeros in it
3805 
3806   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3807   storage.  That is, the stored row and column indices can begin at
3808   either one (as in Fortran) or zero.
3809 
3810   Specify the preallocated storage with either `nz` or `nnz` (not both).
3811   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3812   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3813   matrices.
3814 
3815 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3816 @*/
3817 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3818 {
3819   PetscFunctionBegin;
3820   PetscCall(MatCreate(comm, A));
3821   PetscCall(MatSetSizes(*A, m, n, m, n));
3822   PetscCall(MatSetType(*A, MATSEQBAIJ));
3823   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
3824   PetscFunctionReturn(PETSC_SUCCESS);
3825 }
3826 
3827 /*@C
3828   MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3829   per row in the matrix. For good matrix assembly performance the
3830   user should preallocate the matrix storage by setting the parameter `nz`
3831   (or the array `nnz`).
3832 
3833   Collective
3834 
3835   Input Parameters:
3836 + B   - the matrix
3837 . bs  - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3838           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3839 . nz  - number of block nonzeros per block row (same for all rows)
3840 - nnz - array containing the number of block nonzeros in the various block rows
3841          (possibly different for each block row) or `NULL`
3842 
3843   Options Database Keys:
3844 + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3845 - -mat_block_size - size of the blocks to use
3846 
3847   Level: intermediate
3848 
3849   Notes:
3850   If the `nnz` parameter is given then the `nz` parameter is ignored
3851 
3852   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3853   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3854   You can also run with the option `-info` and look for messages with the string
3855   malloc in them to see if additional memory allocation was needed.
3856 
3857   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3858   storage.  That is, the stored row and column indices can begin at
3859   either one (as in Fortran) or zero.
3860 
3861   Specify the preallocated storage with either nz or nnz (not both).
3862   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3863   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3864 
3865 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3866 @*/
3867 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3868 {
3869   PetscFunctionBegin;
3870   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3871   PetscValidType(B, 1);
3872   PetscValidLogicalCollectiveInt(B, bs, 2);
3873   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
3874   PetscFunctionReturn(PETSC_SUCCESS);
3875 }
3876 
3877 /*@C
3878   MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3879 
3880   Collective
3881 
3882   Input Parameters:
3883 + B  - the matrix
3884 . bs - the blocksize
3885 . i  - the indices into `j` for the start of each local row (starts with zero)
3886 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
3887 - v  - optional values in the matrix
3888 
3889   Level: advanced
3890 
3891   Notes:
3892   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
3893   may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
3894   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3895   `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3896   block column and the second index is over columns within a block.
3897 
3898   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3899 
3900 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3901 @*/
3902 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3903 {
3904   PetscFunctionBegin;
3905   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3906   PetscValidType(B, 1);
3907   PetscValidLogicalCollectiveInt(B, bs, 2);
3908   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
3909   PetscFunctionReturn(PETSC_SUCCESS);
3910 }
3911 
3912 /*@
3913   MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3914 
3915   Collective
3916 
3917   Input Parameters:
3918 + comm - must be an MPI communicator of size 1
3919 . bs   - size of block
3920 . m    - number of rows
3921 . n    - number of columns
3922 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3923 . j    - column indices
3924 - a    - matrix values
3925 
3926   Output Parameter:
3927 . mat - the matrix
3928 
3929   Level: advanced
3930 
3931   Notes:
3932   The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays
3933   once the matrix is destroyed
3934 
3935   You cannot set new nonzero locations into this matrix, that will generate an error.
3936 
3937   The `i` and `j` indices are 0 based
3938 
3939   When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3940 
3941   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3942   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3943   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3944   with column-major ordering within blocks.
3945 
3946 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3947 @*/
3948 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3949 {
3950   Mat_SeqBAIJ *baij;
3951 
3952   PetscFunctionBegin;
3953   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
3954   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3955 
3956   PetscCall(MatCreate(comm, mat));
3957   PetscCall(MatSetSizes(*mat, m, n, m, n));
3958   PetscCall(MatSetType(*mat, MATSEQBAIJ));
3959   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3960   baij = (Mat_SeqBAIJ *)(*mat)->data;
3961   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3962 
3963   baij->i = i;
3964   baij->j = j;
3965   baij->a = a;
3966 
3967   baij->singlemalloc   = PETSC_FALSE;
3968   baij->nonew          = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3969   baij->free_a         = PETSC_FALSE;
3970   baij->free_ij        = PETSC_FALSE;
3971   baij->free_imax_ilen = PETSC_TRUE;
3972 
3973   for (PetscInt ii = 0; ii < m; ii++) {
3974     const PetscInt row_len = i[ii + 1] - i[ii];
3975 
3976     baij->ilen[ii] = baij->imax[ii] = row_len;
3977     PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
3978   }
3979   if (PetscDefined(USE_DEBUG)) {
3980     for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
3981       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3982       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3983     }
3984   }
3985 
3986   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3987   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3988   PetscFunctionReturn(PETSC_SUCCESS);
3989 }
3990 
3991 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3992 {
3993   PetscFunctionBegin;
3994   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
3995   PetscFunctionReturn(PETSC_SUCCESS);
3996 }
3997