xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 7cd49bdee00a26e142bff6e7df7c3fb9209fa795)
1 /*
2     Defines the basic matrix operations for the BAIJ (compressed row)
3   matrix storage format.
4 */
5 #include <../src/mat/impls/baij/seq/baij.h> /*I   "petscmat.h"  I*/
6 #include <petscblaslapack.h>
7 #include <petsc/private/kernels/blockinvert.h>
8 #include <petsc/private/kernels/blockmatmult.h>
9 
10 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */
11 #define TYPE BAIJ
12 #define TYPE_BS
13 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
14 #undef TYPE_BS
15 #define TYPE_BS _BS
16 #define TYPE_BS_ON
17 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
18 #undef TYPE_BS
19 #include "../src/mat/impls/aij/seq/seqhashmat.h"
20 #undef TYPE
21 #undef TYPE_BS_ON
22 
23 #if defined(PETSC_HAVE_HYPRE)
24 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
25 #endif
26 
27 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
28 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
29 #endif
30 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
31 
32 MatGetDiagonalMarkers(SeqBAIJ, A->rmap->bs)
33 
34 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
35 {
36   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
37   PetscInt     m, n, ib, jb, bs = A->rmap->bs;
38   MatScalar   *a_val = a_aij->a;
39 
40   PetscFunctionBegin;
41   PetscCall(MatGetSize(A, &m, &n));
42   PetscCall(PetscArrayzero(reductions, n));
43   if (type == NORM_2) {
44     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
45       for (jb = 0; jb < bs; jb++) {
46         for (ib = 0; ib < bs; ib++) {
47           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
48           a_val++;
49         }
50       }
51     }
52   } else if (type == NORM_1) {
53     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
54       for (jb = 0; jb < bs; jb++) {
55         for (ib = 0; ib < bs; ib++) {
56           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
57           a_val++;
58         }
59       }
60     }
61   } else if (type == NORM_INFINITY) {
62     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
63       for (jb = 0; jb < bs; jb++) {
64         for (ib = 0; ib < bs; ib++) {
65           PetscInt col    = A->cmap->rstart + a_aij->j[i] * bs + jb;
66           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
67           a_val++;
68         }
69       }
70     }
71   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
72     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
73       for (jb = 0; jb < bs; jb++) {
74         for (ib = 0; ib < bs; ib++) {
75           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
76           a_val++;
77         }
78       }
79     }
80   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
81     for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
82       for (jb = 0; jb < bs; jb++) {
83         for (ib = 0; ib < bs; ib++) {
84           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
85           a_val++;
86         }
87       }
88     }
89   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
90   if (type == NORM_2) {
91     for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
92   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
93     for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
94   }
95   PetscFunctionReturn(PETSC_SUCCESS);
96 }
97 
98 static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
99 {
100   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *)A->data;
101   PetscInt        i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
102   MatScalar      *v     = a->a, *odiag, *diag, work[25], *v_work;
103   PetscReal       shift = 0.0;
104   PetscBool       allowzeropivot, zeropivotdetected = PETSC_FALSE;
105   const PetscInt *adiag;
106 
107   PetscFunctionBegin;
108   allowzeropivot = PetscNot(A->erroriffailure);
109 
110   if (a->idiagvalid) {
111     if (values) *values = a->idiag;
112     PetscFunctionReturn(PETSC_SUCCESS);
113   }
114   PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &adiag, NULL));
115   if (!a->idiag) PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag));
116   diag = a->idiag;
117   if (values) *values = a->idiag;
118   /* factor and invert each block */
119   switch (bs) {
120   case 1:
121     for (i = 0; i < mbs; i++) {
122       odiag   = v + 1 * adiag[i];
123       diag[0] = odiag[0];
124 
125       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
126         PetscCheck(allowzeropivot, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
127         A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128         A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
129         A->factorerror_zeropivot_row   = i;
130         PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
131       }
132 
133       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
134       diag += 1;
135     }
136     break;
137   case 2:
138     for (i = 0; i < mbs; i++) {
139       odiag   = v + 4 * adiag[i];
140       diag[0] = odiag[0];
141       diag[1] = odiag[1];
142       diag[2] = odiag[2];
143       diag[3] = odiag[3];
144       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
145       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
146       diag += 4;
147     }
148     break;
149   case 3:
150     for (i = 0; i < mbs; i++) {
151       odiag   = v + 9 * adiag[i];
152       diag[0] = odiag[0];
153       diag[1] = odiag[1];
154       diag[2] = odiag[2];
155       diag[3] = odiag[3];
156       diag[4] = odiag[4];
157       diag[5] = odiag[5];
158       diag[6] = odiag[6];
159       diag[7] = odiag[7];
160       diag[8] = odiag[8];
161       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
162       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
163       diag += 9;
164     }
165     break;
166   case 4:
167     for (i = 0; i < mbs; i++) {
168       odiag = v + 16 * adiag[i];
169       PetscCall(PetscArraycpy(diag, odiag, 16));
170       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
171       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
172       diag += 16;
173     }
174     break;
175   case 5:
176     for (i = 0; i < mbs; i++) {
177       odiag = v + 25 * adiag[i];
178       PetscCall(PetscArraycpy(diag, odiag, 25));
179       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
180       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
181       diag += 25;
182     }
183     break;
184   case 6:
185     for (i = 0; i < mbs; i++) {
186       odiag = v + 36 * adiag[i];
187       PetscCall(PetscArraycpy(diag, odiag, 36));
188       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
189       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
190       diag += 36;
191     }
192     break;
193   case 7:
194     for (i = 0; i < mbs; i++) {
195       odiag = v + 49 * adiag[i];
196       PetscCall(PetscArraycpy(diag, odiag, 49));
197       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
198       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
199       diag += 49;
200     }
201     break;
202   default:
203     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
204     for (i = 0; i < mbs; i++) {
205       odiag = v + bs2 * adiag[i];
206       PetscCall(PetscArraycpy(diag, odiag, bs2));
207       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
208       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
209       diag += bs2;
210     }
211     PetscCall(PetscFree2(v_work, v_pivots));
212   }
213   a->idiagvalid = PETSC_TRUE;
214   PetscFunctionReturn(PETSC_SUCCESS);
215 }
216 
217 static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
218 {
219   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
220   PetscScalar       *x, *work, *w, *workt, *t;
221   const MatScalar   *v, *aa = a->a, *idiag;
222   const PetscScalar *b, *xb;
223   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
224   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
225   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;
226 
227   PetscFunctionBegin;
228   its = its * lits;
229   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
230   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
231   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
232   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
233   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
234 
235   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
236 
237   if (!m) PetscFunctionReturn(PETSC_SUCCESS);
238   diag  = a->diag;
239   idiag = a->idiag;
240   k     = PetscMax(A->rmap->n, A->cmap->n);
241   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
242   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
243   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
244   work = a->mult_work;
245   t    = a->sor_workt;
246   w    = a->sor_work;
247 
248   PetscCall(VecGetArray(xx, &x));
249   PetscCall(VecGetArrayRead(bb, &b));
250 
251   if (flag & SOR_ZERO_INITIAL_GUESS) {
252     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
253       switch (bs) {
254       case 1:
255         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
256         t[0] = b[0];
257         i2   = 1;
258         idiag += 1;
259         for (i = 1; i < m; i++) {
260           v    = aa + ai[i];
261           vi   = aj + ai[i];
262           nz   = diag[i] - ai[i];
263           s[0] = b[i2];
264           for (j = 0; j < nz; j++) {
265             xw[0] = x[vi[j]];
266             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
267           }
268           t[i2] = s[0];
269           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
270           x[i2] = xw[0];
271           idiag += 1;
272           i2 += 1;
273         }
274         break;
275       case 2:
276         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
277         t[0] = b[0];
278         t[1] = b[1];
279         i2   = 2;
280         idiag += 4;
281         for (i = 1; i < m; i++) {
282           v    = aa + 4 * ai[i];
283           vi   = aj + ai[i];
284           nz   = diag[i] - ai[i];
285           s[0] = b[i2];
286           s[1] = b[i2 + 1];
287           for (j = 0; j < nz; j++) {
288             idx   = 2 * vi[j];
289             it    = 4 * j;
290             xw[0] = x[idx];
291             xw[1] = x[1 + idx];
292             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
293           }
294           t[i2]     = s[0];
295           t[i2 + 1] = s[1];
296           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
297           x[i2]     = xw[0];
298           x[i2 + 1] = xw[1];
299           idiag += 4;
300           i2 += 2;
301         }
302         break;
303       case 3:
304         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
305         t[0] = b[0];
306         t[1] = b[1];
307         t[2] = b[2];
308         i2   = 3;
309         idiag += 9;
310         for (i = 1; i < m; i++) {
311           v    = aa + 9 * ai[i];
312           vi   = aj + ai[i];
313           nz   = diag[i] - ai[i];
314           s[0] = b[i2];
315           s[1] = b[i2 + 1];
316           s[2] = b[i2 + 2];
317           while (nz--) {
318             idx   = 3 * (*vi++);
319             xw[0] = x[idx];
320             xw[1] = x[1 + idx];
321             xw[2] = x[2 + idx];
322             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
323             v += 9;
324           }
325           t[i2]     = s[0];
326           t[i2 + 1] = s[1];
327           t[i2 + 2] = s[2];
328           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
329           x[i2]     = xw[0];
330           x[i2 + 1] = xw[1];
331           x[i2 + 2] = xw[2];
332           idiag += 9;
333           i2 += 3;
334         }
335         break;
336       case 4:
337         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
338         t[0] = b[0];
339         t[1] = b[1];
340         t[2] = b[2];
341         t[3] = b[3];
342         i2   = 4;
343         idiag += 16;
344         for (i = 1; i < m; i++) {
345           v    = aa + 16 * ai[i];
346           vi   = aj + ai[i];
347           nz   = diag[i] - ai[i];
348           s[0] = b[i2];
349           s[1] = b[i2 + 1];
350           s[2] = b[i2 + 2];
351           s[3] = b[i2 + 3];
352           while (nz--) {
353             idx   = 4 * (*vi++);
354             xw[0] = x[idx];
355             xw[1] = x[1 + idx];
356             xw[2] = x[2 + idx];
357             xw[3] = x[3 + idx];
358             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
359             v += 16;
360           }
361           t[i2]     = s[0];
362           t[i2 + 1] = s[1];
363           t[i2 + 2] = s[2];
364           t[i2 + 3] = s[3];
365           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
366           x[i2]     = xw[0];
367           x[i2 + 1] = xw[1];
368           x[i2 + 2] = xw[2];
369           x[i2 + 3] = xw[3];
370           idiag += 16;
371           i2 += 4;
372         }
373         break;
374       case 5:
375         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
376         t[0] = b[0];
377         t[1] = b[1];
378         t[2] = b[2];
379         t[3] = b[3];
380         t[4] = b[4];
381         i2   = 5;
382         idiag += 25;
383         for (i = 1; i < m; i++) {
384           v    = aa + 25 * ai[i];
385           vi   = aj + ai[i];
386           nz   = diag[i] - ai[i];
387           s[0] = b[i2];
388           s[1] = b[i2 + 1];
389           s[2] = b[i2 + 2];
390           s[3] = b[i2 + 3];
391           s[4] = b[i2 + 4];
392           while (nz--) {
393             idx   = 5 * (*vi++);
394             xw[0] = x[idx];
395             xw[1] = x[1 + idx];
396             xw[2] = x[2 + idx];
397             xw[3] = x[3 + idx];
398             xw[4] = x[4 + idx];
399             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
400             v += 25;
401           }
402           t[i2]     = s[0];
403           t[i2 + 1] = s[1];
404           t[i2 + 2] = s[2];
405           t[i2 + 3] = s[3];
406           t[i2 + 4] = s[4];
407           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
408           x[i2]     = xw[0];
409           x[i2 + 1] = xw[1];
410           x[i2 + 2] = xw[2];
411           x[i2 + 3] = xw[3];
412           x[i2 + 4] = xw[4];
413           idiag += 25;
414           i2 += 5;
415         }
416         break;
417       case 6:
418         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
419         t[0] = b[0];
420         t[1] = b[1];
421         t[2] = b[2];
422         t[3] = b[3];
423         t[4] = b[4];
424         t[5] = b[5];
425         i2   = 6;
426         idiag += 36;
427         for (i = 1; i < m; i++) {
428           v    = aa + 36 * ai[i];
429           vi   = aj + ai[i];
430           nz   = diag[i] - ai[i];
431           s[0] = b[i2];
432           s[1] = b[i2 + 1];
433           s[2] = b[i2 + 2];
434           s[3] = b[i2 + 3];
435           s[4] = b[i2 + 4];
436           s[5] = b[i2 + 5];
437           while (nz--) {
438             idx   = 6 * (*vi++);
439             xw[0] = x[idx];
440             xw[1] = x[1 + idx];
441             xw[2] = x[2 + idx];
442             xw[3] = x[3 + idx];
443             xw[4] = x[4 + idx];
444             xw[5] = x[5 + idx];
445             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
446             v += 36;
447           }
448           t[i2]     = s[0];
449           t[i2 + 1] = s[1];
450           t[i2 + 2] = s[2];
451           t[i2 + 3] = s[3];
452           t[i2 + 4] = s[4];
453           t[i2 + 5] = s[5];
454           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
455           x[i2]     = xw[0];
456           x[i2 + 1] = xw[1];
457           x[i2 + 2] = xw[2];
458           x[i2 + 3] = xw[3];
459           x[i2 + 4] = xw[4];
460           x[i2 + 5] = xw[5];
461           idiag += 36;
462           i2 += 6;
463         }
464         break;
465       case 7:
466         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
467         t[0] = b[0];
468         t[1] = b[1];
469         t[2] = b[2];
470         t[3] = b[3];
471         t[4] = b[4];
472         t[5] = b[5];
473         t[6] = b[6];
474         i2   = 7;
475         idiag += 49;
476         for (i = 1; i < m; i++) {
477           v    = aa + 49 * ai[i];
478           vi   = aj + ai[i];
479           nz   = diag[i] - ai[i];
480           s[0] = b[i2];
481           s[1] = b[i2 + 1];
482           s[2] = b[i2 + 2];
483           s[3] = b[i2 + 3];
484           s[4] = b[i2 + 4];
485           s[5] = b[i2 + 5];
486           s[6] = b[i2 + 6];
487           while (nz--) {
488             idx   = 7 * (*vi++);
489             xw[0] = x[idx];
490             xw[1] = x[1 + idx];
491             xw[2] = x[2 + idx];
492             xw[3] = x[3 + idx];
493             xw[4] = x[4 + idx];
494             xw[5] = x[5 + idx];
495             xw[6] = x[6 + idx];
496             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
497             v += 49;
498           }
499           t[i2]     = s[0];
500           t[i2 + 1] = s[1];
501           t[i2 + 2] = s[2];
502           t[i2 + 3] = s[3];
503           t[i2 + 4] = s[4];
504           t[i2 + 5] = s[5];
505           t[i2 + 6] = s[6];
506           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
507           x[i2]     = xw[0];
508           x[i2 + 1] = xw[1];
509           x[i2 + 2] = xw[2];
510           x[i2 + 3] = xw[3];
511           x[i2 + 4] = xw[4];
512           x[i2 + 5] = xw[5];
513           x[i2 + 6] = xw[6];
514           idiag += 49;
515           i2 += 7;
516         }
517         break;
518       default:
519         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
520         PetscCall(PetscArraycpy(t, b, bs));
521         i2 = bs;
522         idiag += bs2;
523         for (i = 1; i < m; i++) {
524           v  = aa + bs2 * ai[i];
525           vi = aj + ai[i];
526           nz = diag[i] - ai[i];
527 
528           PetscCall(PetscArraycpy(w, b + i2, bs));
529           /* copy all rows of x that are needed into contiguous space */
530           workt = work;
531           for (j = 0; j < nz; j++) {
532             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
533             workt += bs;
534           }
535           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
536           PetscCall(PetscArraycpy(t + i2, w, bs));
537           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
538 
539           idiag += bs2;
540           i2 += bs;
541         }
542         break;
543       }
544       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
545       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
546       xb = t;
547     } else xb = b;
548     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
549       idiag = a->idiag + bs2 * (a->mbs - 1);
550       i2    = bs * (m - 1);
551       switch (bs) {
552       case 1:
553         s[0] = xb[i2];
554         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
555         x[i2] = xw[0];
556         i2 -= 1;
557         for (i = m - 2; i >= 0; i--) {
558           v    = aa + (diag[i] + 1);
559           vi   = aj + diag[i] + 1;
560           nz   = ai[i + 1] - diag[i] - 1;
561           s[0] = xb[i2];
562           for (j = 0; j < nz; j++) {
563             xw[0] = x[vi[j]];
564             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
565           }
566           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
567           x[i2] = xw[0];
568           idiag -= 1;
569           i2 -= 1;
570         }
571         break;
572       case 2:
573         s[0] = xb[i2];
574         s[1] = xb[i2 + 1];
575         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
576         x[i2]     = xw[0];
577         x[i2 + 1] = xw[1];
578         i2 -= 2;
579         idiag -= 4;
580         for (i = m - 2; i >= 0; i--) {
581           v    = aa + 4 * (diag[i] + 1);
582           vi   = aj + diag[i] + 1;
583           nz   = ai[i + 1] - diag[i] - 1;
584           s[0] = xb[i2];
585           s[1] = xb[i2 + 1];
586           for (j = 0; j < nz; j++) {
587             idx   = 2 * vi[j];
588             it    = 4 * j;
589             xw[0] = x[idx];
590             xw[1] = x[1 + idx];
591             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
592           }
593           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
594           x[i2]     = xw[0];
595           x[i2 + 1] = xw[1];
596           idiag -= 4;
597           i2 -= 2;
598         }
599         break;
600       case 3:
601         s[0] = xb[i2];
602         s[1] = xb[i2 + 1];
603         s[2] = xb[i2 + 2];
604         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
605         x[i2]     = xw[0];
606         x[i2 + 1] = xw[1];
607         x[i2 + 2] = xw[2];
608         i2 -= 3;
609         idiag -= 9;
610         for (i = m - 2; i >= 0; i--) {
611           v    = aa + 9 * (diag[i] + 1);
612           vi   = aj + diag[i] + 1;
613           nz   = ai[i + 1] - diag[i] - 1;
614           s[0] = xb[i2];
615           s[1] = xb[i2 + 1];
616           s[2] = xb[i2 + 2];
617           while (nz--) {
618             idx   = 3 * (*vi++);
619             xw[0] = x[idx];
620             xw[1] = x[1 + idx];
621             xw[2] = x[2 + idx];
622             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
623             v += 9;
624           }
625           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
626           x[i2]     = xw[0];
627           x[i2 + 1] = xw[1];
628           x[i2 + 2] = xw[2];
629           idiag -= 9;
630           i2 -= 3;
631         }
632         break;
633       case 4:
634         s[0] = xb[i2];
635         s[1] = xb[i2 + 1];
636         s[2] = xb[i2 + 2];
637         s[3] = xb[i2 + 3];
638         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
639         x[i2]     = xw[0];
640         x[i2 + 1] = xw[1];
641         x[i2 + 2] = xw[2];
642         x[i2 + 3] = xw[3];
643         i2 -= 4;
644         idiag -= 16;
645         for (i = m - 2; i >= 0; i--) {
646           v    = aa + 16 * (diag[i] + 1);
647           vi   = aj + diag[i] + 1;
648           nz   = ai[i + 1] - diag[i] - 1;
649           s[0] = xb[i2];
650           s[1] = xb[i2 + 1];
651           s[2] = xb[i2 + 2];
652           s[3] = xb[i2 + 3];
653           while (nz--) {
654             idx   = 4 * (*vi++);
655             xw[0] = x[idx];
656             xw[1] = x[1 + idx];
657             xw[2] = x[2 + idx];
658             xw[3] = x[3 + idx];
659             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
660             v += 16;
661           }
662           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
663           x[i2]     = xw[0];
664           x[i2 + 1] = xw[1];
665           x[i2 + 2] = xw[2];
666           x[i2 + 3] = xw[3];
667           idiag -= 16;
668           i2 -= 4;
669         }
670         break;
671       case 5:
672         s[0] = xb[i2];
673         s[1] = xb[i2 + 1];
674         s[2] = xb[i2 + 2];
675         s[3] = xb[i2 + 3];
676         s[4] = xb[i2 + 4];
677         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
678         x[i2]     = xw[0];
679         x[i2 + 1] = xw[1];
680         x[i2 + 2] = xw[2];
681         x[i2 + 3] = xw[3];
682         x[i2 + 4] = xw[4];
683         i2 -= 5;
684         idiag -= 25;
685         for (i = m - 2; i >= 0; i--) {
686           v    = aa + 25 * (diag[i] + 1);
687           vi   = aj + diag[i] + 1;
688           nz   = ai[i + 1] - diag[i] - 1;
689           s[0] = xb[i2];
690           s[1] = xb[i2 + 1];
691           s[2] = xb[i2 + 2];
692           s[3] = xb[i2 + 3];
693           s[4] = xb[i2 + 4];
694           while (nz--) {
695             idx   = 5 * (*vi++);
696             xw[0] = x[idx];
697             xw[1] = x[1 + idx];
698             xw[2] = x[2 + idx];
699             xw[3] = x[3 + idx];
700             xw[4] = x[4 + idx];
701             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
702             v += 25;
703           }
704           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
705           x[i2]     = xw[0];
706           x[i2 + 1] = xw[1];
707           x[i2 + 2] = xw[2];
708           x[i2 + 3] = xw[3];
709           x[i2 + 4] = xw[4];
710           idiag -= 25;
711           i2 -= 5;
712         }
713         break;
714       case 6:
715         s[0] = xb[i2];
716         s[1] = xb[i2 + 1];
717         s[2] = xb[i2 + 2];
718         s[3] = xb[i2 + 3];
719         s[4] = xb[i2 + 4];
720         s[5] = xb[i2 + 5];
721         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
722         x[i2]     = xw[0];
723         x[i2 + 1] = xw[1];
724         x[i2 + 2] = xw[2];
725         x[i2 + 3] = xw[3];
726         x[i2 + 4] = xw[4];
727         x[i2 + 5] = xw[5];
728         i2 -= 6;
729         idiag -= 36;
730         for (i = m - 2; i >= 0; i--) {
731           v    = aa + 36 * (diag[i] + 1);
732           vi   = aj + diag[i] + 1;
733           nz   = ai[i + 1] - diag[i] - 1;
734           s[0] = xb[i2];
735           s[1] = xb[i2 + 1];
736           s[2] = xb[i2 + 2];
737           s[3] = xb[i2 + 3];
738           s[4] = xb[i2 + 4];
739           s[5] = xb[i2 + 5];
740           while (nz--) {
741             idx   = 6 * (*vi++);
742             xw[0] = x[idx];
743             xw[1] = x[1 + idx];
744             xw[2] = x[2 + idx];
745             xw[3] = x[3 + idx];
746             xw[4] = x[4 + idx];
747             xw[5] = x[5 + idx];
748             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
749             v += 36;
750           }
751           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
752           x[i2]     = xw[0];
753           x[i2 + 1] = xw[1];
754           x[i2 + 2] = xw[2];
755           x[i2 + 3] = xw[3];
756           x[i2 + 4] = xw[4];
757           x[i2 + 5] = xw[5];
758           idiag -= 36;
759           i2 -= 6;
760         }
761         break;
762       case 7:
763         s[0] = xb[i2];
764         s[1] = xb[i2 + 1];
765         s[2] = xb[i2 + 2];
766         s[3] = xb[i2 + 3];
767         s[4] = xb[i2 + 4];
768         s[5] = xb[i2 + 5];
769         s[6] = xb[i2 + 6];
770         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
771         x[i2]     = xw[0];
772         x[i2 + 1] = xw[1];
773         x[i2 + 2] = xw[2];
774         x[i2 + 3] = xw[3];
775         x[i2 + 4] = xw[4];
776         x[i2 + 5] = xw[5];
777         x[i2 + 6] = xw[6];
778         i2 -= 7;
779         idiag -= 49;
780         for (i = m - 2; i >= 0; i--) {
781           v    = aa + 49 * (diag[i] + 1);
782           vi   = aj + diag[i] + 1;
783           nz   = ai[i + 1] - diag[i] - 1;
784           s[0] = xb[i2];
785           s[1] = xb[i2 + 1];
786           s[2] = xb[i2 + 2];
787           s[3] = xb[i2 + 3];
788           s[4] = xb[i2 + 4];
789           s[5] = xb[i2 + 5];
790           s[6] = xb[i2 + 6];
791           while (nz--) {
792             idx   = 7 * (*vi++);
793             xw[0] = x[idx];
794             xw[1] = x[1 + idx];
795             xw[2] = x[2 + idx];
796             xw[3] = x[3 + idx];
797             xw[4] = x[4 + idx];
798             xw[5] = x[5 + idx];
799             xw[6] = x[6 + idx];
800             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
801             v += 49;
802           }
803           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
804           x[i2]     = xw[0];
805           x[i2 + 1] = xw[1];
806           x[i2 + 2] = xw[2];
807           x[i2 + 3] = xw[3];
808           x[i2 + 4] = xw[4];
809           x[i2 + 5] = xw[5];
810           x[i2 + 6] = xw[6];
811           idiag -= 49;
812           i2 -= 7;
813         }
814         break;
815       default:
816         PetscCall(PetscArraycpy(w, xb + i2, bs));
817         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
818         i2 -= bs;
819         idiag -= bs2;
820         for (i = m - 2; i >= 0; i--) {
821           v  = aa + bs2 * (diag[i] + 1);
822           vi = aj + diag[i] + 1;
823           nz = ai[i + 1] - diag[i] - 1;
824 
825           PetscCall(PetscArraycpy(w, xb + i2, bs));
826           /* copy all rows of x that are needed into contiguous space */
827           workt = work;
828           for (j = 0; j < nz; j++) {
829             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
830             workt += bs;
831           }
832           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
833           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
834 
835           idiag -= bs2;
836           i2 -= bs;
837         }
838         break;
839       }
840       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
841     }
842     its--;
843   }
844   while (its--) {
845     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
846       idiag = a->idiag;
847       i2    = 0;
848       switch (bs) {
849       case 1:
850         for (i = 0; i < m; i++) {
851           v    = aa + ai[i];
852           vi   = aj + ai[i];
853           nz   = ai[i + 1] - ai[i];
854           s[0] = b[i2];
855           for (j = 0; j < nz; j++) {
856             xw[0] = x[vi[j]];
857             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
858           }
859           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
860           x[i2] += xw[0];
861           idiag += 1;
862           i2 += 1;
863         }
864         break;
865       case 2:
866         for (i = 0; i < m; i++) {
867           v    = aa + 4 * ai[i];
868           vi   = aj + ai[i];
869           nz   = ai[i + 1] - ai[i];
870           s[0] = b[i2];
871           s[1] = b[i2 + 1];
872           for (j = 0; j < nz; j++) {
873             idx   = 2 * vi[j];
874             it    = 4 * j;
875             xw[0] = x[idx];
876             xw[1] = x[1 + idx];
877             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
878           }
879           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
880           x[i2] += xw[0];
881           x[i2 + 1] += xw[1];
882           idiag += 4;
883           i2 += 2;
884         }
885         break;
886       case 3:
887         for (i = 0; i < m; i++) {
888           v    = aa + 9 * ai[i];
889           vi   = aj + ai[i];
890           nz   = ai[i + 1] - ai[i];
891           s[0] = b[i2];
892           s[1] = b[i2 + 1];
893           s[2] = b[i2 + 2];
894           while (nz--) {
895             idx   = 3 * (*vi++);
896             xw[0] = x[idx];
897             xw[1] = x[1 + idx];
898             xw[2] = x[2 + idx];
899             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
900             v += 9;
901           }
902           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
903           x[i2] += xw[0];
904           x[i2 + 1] += xw[1];
905           x[i2 + 2] += xw[2];
906           idiag += 9;
907           i2 += 3;
908         }
909         break;
910       case 4:
911         for (i = 0; i < m; i++) {
912           v    = aa + 16 * ai[i];
913           vi   = aj + ai[i];
914           nz   = ai[i + 1] - ai[i];
915           s[0] = b[i2];
916           s[1] = b[i2 + 1];
917           s[2] = b[i2 + 2];
918           s[3] = b[i2 + 3];
919           while (nz--) {
920             idx   = 4 * (*vi++);
921             xw[0] = x[idx];
922             xw[1] = x[1 + idx];
923             xw[2] = x[2 + idx];
924             xw[3] = x[3 + idx];
925             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
926             v += 16;
927           }
928           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
929           x[i2] += xw[0];
930           x[i2 + 1] += xw[1];
931           x[i2 + 2] += xw[2];
932           x[i2 + 3] += xw[3];
933           idiag += 16;
934           i2 += 4;
935         }
936         break;
937       case 5:
938         for (i = 0; i < m; i++) {
939           v    = aa + 25 * ai[i];
940           vi   = aj + ai[i];
941           nz   = ai[i + 1] - ai[i];
942           s[0] = b[i2];
943           s[1] = b[i2 + 1];
944           s[2] = b[i2 + 2];
945           s[3] = b[i2 + 3];
946           s[4] = b[i2 + 4];
947           while (nz--) {
948             idx   = 5 * (*vi++);
949             xw[0] = x[idx];
950             xw[1] = x[1 + idx];
951             xw[2] = x[2 + idx];
952             xw[3] = x[3 + idx];
953             xw[4] = x[4 + idx];
954             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
955             v += 25;
956           }
957           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
958           x[i2] += xw[0];
959           x[i2 + 1] += xw[1];
960           x[i2 + 2] += xw[2];
961           x[i2 + 3] += xw[3];
962           x[i2 + 4] += xw[4];
963           idiag += 25;
964           i2 += 5;
965         }
966         break;
967       case 6:
968         for (i = 0; i < m; i++) {
969           v    = aa + 36 * ai[i];
970           vi   = aj + ai[i];
971           nz   = ai[i + 1] - ai[i];
972           s[0] = b[i2];
973           s[1] = b[i2 + 1];
974           s[2] = b[i2 + 2];
975           s[3] = b[i2 + 3];
976           s[4] = b[i2 + 4];
977           s[5] = b[i2 + 5];
978           while (nz--) {
979             idx   = 6 * (*vi++);
980             xw[0] = x[idx];
981             xw[1] = x[1 + idx];
982             xw[2] = x[2 + idx];
983             xw[3] = x[3 + idx];
984             xw[4] = x[4 + idx];
985             xw[5] = x[5 + idx];
986             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
987             v += 36;
988           }
989           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
990           x[i2] += xw[0];
991           x[i2 + 1] += xw[1];
992           x[i2 + 2] += xw[2];
993           x[i2 + 3] += xw[3];
994           x[i2 + 4] += xw[4];
995           x[i2 + 5] += xw[5];
996           idiag += 36;
997           i2 += 6;
998         }
999         break;
1000       case 7:
1001         for (i = 0; i < m; i++) {
1002           v    = aa + 49 * ai[i];
1003           vi   = aj + ai[i];
1004           nz   = ai[i + 1] - ai[i];
1005           s[0] = b[i2];
1006           s[1] = b[i2 + 1];
1007           s[2] = b[i2 + 2];
1008           s[3] = b[i2 + 3];
1009           s[4] = b[i2 + 4];
1010           s[5] = b[i2 + 5];
1011           s[6] = b[i2 + 6];
1012           while (nz--) {
1013             idx   = 7 * (*vi++);
1014             xw[0] = x[idx];
1015             xw[1] = x[1 + idx];
1016             xw[2] = x[2 + idx];
1017             xw[3] = x[3 + idx];
1018             xw[4] = x[4 + idx];
1019             xw[5] = x[5 + idx];
1020             xw[6] = x[6 + idx];
1021             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1022             v += 49;
1023           }
1024           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1025           x[i2] += xw[0];
1026           x[i2 + 1] += xw[1];
1027           x[i2 + 2] += xw[2];
1028           x[i2 + 3] += xw[3];
1029           x[i2 + 4] += xw[4];
1030           x[i2 + 5] += xw[5];
1031           x[i2 + 6] += xw[6];
1032           idiag += 49;
1033           i2 += 7;
1034         }
1035         break;
1036       default:
1037         for (i = 0; i < m; i++) {
1038           v  = aa + bs2 * ai[i];
1039           vi = aj + ai[i];
1040           nz = ai[i + 1] - ai[i];
1041 
1042           PetscCall(PetscArraycpy(w, b + i2, bs));
1043           /* copy all rows of x that are needed into contiguous space */
1044           workt = work;
1045           for (j = 0; j < nz; j++) {
1046             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1047             workt += bs;
1048           }
1049           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1050           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1051 
1052           idiag += bs2;
1053           i2 += bs;
1054         }
1055         break;
1056       }
1057       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1058     }
1059     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1060       idiag = a->idiag + bs2 * (a->mbs - 1);
1061       i2    = bs * (m - 1);
1062       switch (bs) {
1063       case 1:
1064         for (i = m - 1; i >= 0; i--) {
1065           v    = aa + ai[i];
1066           vi   = aj + ai[i];
1067           nz   = ai[i + 1] - ai[i];
1068           s[0] = b[i2];
1069           for (j = 0; j < nz; j++) {
1070             xw[0] = x[vi[j]];
1071             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1072           }
1073           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1074           x[i2] += xw[0];
1075           idiag -= 1;
1076           i2 -= 1;
1077         }
1078         break;
1079       case 2:
1080         for (i = m - 1; i >= 0; i--) {
1081           v    = aa + 4 * ai[i];
1082           vi   = aj + ai[i];
1083           nz   = ai[i + 1] - ai[i];
1084           s[0] = b[i2];
1085           s[1] = b[i2 + 1];
1086           for (j = 0; j < nz; j++) {
1087             idx   = 2 * vi[j];
1088             it    = 4 * j;
1089             xw[0] = x[idx];
1090             xw[1] = x[1 + idx];
1091             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1092           }
1093           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
1094           x[i2] += xw[0];
1095           x[i2 + 1] += xw[1];
1096           idiag -= 4;
1097           i2 -= 2;
1098         }
1099         break;
1100       case 3:
1101         for (i = m - 1; i >= 0; i--) {
1102           v    = aa + 9 * ai[i];
1103           vi   = aj + ai[i];
1104           nz   = ai[i + 1] - ai[i];
1105           s[0] = b[i2];
1106           s[1] = b[i2 + 1];
1107           s[2] = b[i2 + 2];
1108           while (nz--) {
1109             idx   = 3 * (*vi++);
1110             xw[0] = x[idx];
1111             xw[1] = x[1 + idx];
1112             xw[2] = x[2 + idx];
1113             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1114             v += 9;
1115           }
1116           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
1117           x[i2] += xw[0];
1118           x[i2 + 1] += xw[1];
1119           x[i2 + 2] += xw[2];
1120           idiag -= 9;
1121           i2 -= 3;
1122         }
1123         break;
1124       case 4:
1125         for (i = m - 1; i >= 0; i--) {
1126           v    = aa + 16 * ai[i];
1127           vi   = aj + ai[i];
1128           nz   = ai[i + 1] - ai[i];
1129           s[0] = b[i2];
1130           s[1] = b[i2 + 1];
1131           s[2] = b[i2 + 2];
1132           s[3] = b[i2 + 3];
1133           while (nz--) {
1134             idx   = 4 * (*vi++);
1135             xw[0] = x[idx];
1136             xw[1] = x[1 + idx];
1137             xw[2] = x[2 + idx];
1138             xw[3] = x[3 + idx];
1139             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1140             v += 16;
1141           }
1142           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
1143           x[i2] += xw[0];
1144           x[i2 + 1] += xw[1];
1145           x[i2 + 2] += xw[2];
1146           x[i2 + 3] += xw[3];
1147           idiag -= 16;
1148           i2 -= 4;
1149         }
1150         break;
1151       case 5:
1152         for (i = m - 1; i >= 0; i--) {
1153           v    = aa + 25 * ai[i];
1154           vi   = aj + ai[i];
1155           nz   = ai[i + 1] - ai[i];
1156           s[0] = b[i2];
1157           s[1] = b[i2 + 1];
1158           s[2] = b[i2 + 2];
1159           s[3] = b[i2 + 3];
1160           s[4] = b[i2 + 4];
1161           while (nz--) {
1162             idx   = 5 * (*vi++);
1163             xw[0] = x[idx];
1164             xw[1] = x[1 + idx];
1165             xw[2] = x[2 + idx];
1166             xw[3] = x[3 + idx];
1167             xw[4] = x[4 + idx];
1168             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1169             v += 25;
1170           }
1171           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
1172           x[i2] += xw[0];
1173           x[i2 + 1] += xw[1];
1174           x[i2 + 2] += xw[2];
1175           x[i2 + 3] += xw[3];
1176           x[i2 + 4] += xw[4];
1177           idiag -= 25;
1178           i2 -= 5;
1179         }
1180         break;
1181       case 6:
1182         for (i = m - 1; i >= 0; i--) {
1183           v    = aa + 36 * ai[i];
1184           vi   = aj + ai[i];
1185           nz   = ai[i + 1] - ai[i];
1186           s[0] = b[i2];
1187           s[1] = b[i2 + 1];
1188           s[2] = b[i2 + 2];
1189           s[3] = b[i2 + 3];
1190           s[4] = b[i2 + 4];
1191           s[5] = b[i2 + 5];
1192           while (nz--) {
1193             idx   = 6 * (*vi++);
1194             xw[0] = x[idx];
1195             xw[1] = x[1 + idx];
1196             xw[2] = x[2 + idx];
1197             xw[3] = x[3 + idx];
1198             xw[4] = x[4 + idx];
1199             xw[5] = x[5 + idx];
1200             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1201             v += 36;
1202           }
1203           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
1204           x[i2] += xw[0];
1205           x[i2 + 1] += xw[1];
1206           x[i2 + 2] += xw[2];
1207           x[i2 + 3] += xw[3];
1208           x[i2 + 4] += xw[4];
1209           x[i2 + 5] += xw[5];
1210           idiag -= 36;
1211           i2 -= 6;
1212         }
1213         break;
1214       case 7:
1215         for (i = m - 1; i >= 0; i--) {
1216           v    = aa + 49 * ai[i];
1217           vi   = aj + ai[i];
1218           nz   = ai[i + 1] - ai[i];
1219           s[0] = b[i2];
1220           s[1] = b[i2 + 1];
1221           s[2] = b[i2 + 2];
1222           s[3] = b[i2 + 3];
1223           s[4] = b[i2 + 4];
1224           s[5] = b[i2 + 5];
1225           s[6] = b[i2 + 6];
1226           while (nz--) {
1227             idx   = 7 * (*vi++);
1228             xw[0] = x[idx];
1229             xw[1] = x[1 + idx];
1230             xw[2] = x[2 + idx];
1231             xw[3] = x[3 + idx];
1232             xw[4] = x[4 + idx];
1233             xw[5] = x[5 + idx];
1234             xw[6] = x[6 + idx];
1235             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1236             v += 49;
1237           }
1238           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1239           x[i2] += xw[0];
1240           x[i2 + 1] += xw[1];
1241           x[i2 + 2] += xw[2];
1242           x[i2 + 3] += xw[3];
1243           x[i2 + 4] += xw[4];
1244           x[i2 + 5] += xw[5];
1245           x[i2 + 6] += xw[6];
1246           idiag -= 49;
1247           i2 -= 7;
1248         }
1249         break;
1250       default:
1251         for (i = m - 1; i >= 0; i--) {
1252           v  = aa + bs2 * ai[i];
1253           vi = aj + ai[i];
1254           nz = ai[i + 1] - ai[i];
1255 
1256           PetscCall(PetscArraycpy(w, b + i2, bs));
1257           /* copy all rows of x that are needed into contiguous space */
1258           workt = work;
1259           for (j = 0; j < nz; j++) {
1260             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1261             workt += bs;
1262           }
1263           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1264           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1265 
1266           idiag -= bs2;
1267           i2 -= bs;
1268         }
1269         break;
1270       }
1271       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1272     }
1273   }
1274   PetscCall(VecRestoreArray(xx, &x));
1275   PetscCall(VecRestoreArrayRead(bb, &b));
1276   PetscFunctionReturn(PETSC_SUCCESS);
1277 }
1278 
1279 /*
1280     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1281 */
1282 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1283   #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1284 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1285   #define matsetvaluesblocked4_ matsetvaluesblocked4
1286 #endif
1287 
1288 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1289 {
1290   Mat                A = *AA;
1291   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1292   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1293   PetscInt          *ai = a->i, *ailen = a->ilen;
1294   PetscInt          *aj = a->j, stepval, lastcol = -1;
1295   const PetscScalar *value = v;
1296   MatScalar         *ap, *aa = a->a, *bap;
1297 
1298   PetscFunctionBegin;
1299   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1300   stepval = (n - 1) * 4;
1301   for (k = 0; k < m; k++) { /* loop over added rows */
1302     row  = im[k];
1303     rp   = aj + ai[row];
1304     ap   = aa + 16 * ai[row];
1305     nrow = ailen[row];
1306     low  = 0;
1307     high = nrow;
1308     for (l = 0; l < n; l++) { /* loop over added columns */
1309       col = in[l];
1310       if (col <= lastcol) low = 0;
1311       else high = nrow;
1312       lastcol = col;
1313       value   = v + k * (stepval + 4 + l) * 4;
1314       while (high - low > 7) {
1315         t = (low + high) / 2;
1316         if (rp[t] > col) high = t;
1317         else low = t;
1318       }
1319       for (i = low; i < high; i++) {
1320         if (rp[i] > col) break;
1321         if (rp[i] == col) {
1322           bap = ap + 16 * i;
1323           for (ii = 0; ii < 4; ii++, value += stepval) {
1324             for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1325           }
1326           goto noinsert2;
1327         }
1328       }
1329       N = nrow++ - 1;
1330       high++; /* added new column index thus must search to one higher than before */
1331       /* shift up all the later entries in this row */
1332       for (ii = N; ii >= i; ii--) {
1333         rp[ii + 1] = rp[ii];
1334         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1335       }
1336       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1337       rp[i] = col;
1338       bap   = ap + 16 * i;
1339       for (ii = 0; ii < 4; ii++, value += stepval) {
1340         for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1341       }
1342     noinsert2:;
1343       low = i;
1344     }
1345     ailen[row] = nrow;
1346   }
1347   PetscFunctionReturnVoid();
1348 }
1349 
1350 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1351   #define matsetvalues4_ MATSETVALUES4
1352 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1353   #define matsetvalues4_ matsetvalues4
1354 #endif
1355 
1356 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1357 {
1358   Mat          A = *AA;
1359   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1360   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1361   PetscInt    *ai = a->i, *ailen = a->ilen;
1362   PetscInt    *aj = a->j, brow, bcol;
1363   PetscInt     ridx, cidx, lastcol = -1;
1364   MatScalar   *ap, value, *aa      = a->a, *bap;
1365 
1366   PetscFunctionBegin;
1367   for (k = 0; k < m; k++) { /* loop over added rows */
1368     row  = im[k];
1369     brow = row / 4;
1370     rp   = aj + ai[brow];
1371     ap   = aa + 16 * ai[brow];
1372     nrow = ailen[brow];
1373     low  = 0;
1374     high = nrow;
1375     for (l = 0; l < n; l++) { /* loop over added columns */
1376       col   = in[l];
1377       bcol  = col / 4;
1378       ridx  = row % 4;
1379       cidx  = col % 4;
1380       value = v[l + k * n];
1381       if (col <= lastcol) low = 0;
1382       else high = nrow;
1383       lastcol = col;
1384       while (high - low > 7) {
1385         t = (low + high) / 2;
1386         if (rp[t] > bcol) high = t;
1387         else low = t;
1388       }
1389       for (i = low; i < high; i++) {
1390         if (rp[i] > bcol) break;
1391         if (rp[i] == bcol) {
1392           bap = ap + 16 * i + 4 * cidx + ridx;
1393           *bap += value;
1394           goto noinsert1;
1395         }
1396       }
1397       N = nrow++ - 1;
1398       high++; /* added new column thus must search to one higher than before */
1399       /* shift up all the later entries in this row */
1400       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
1401       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
1402       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1403       rp[i]                        = bcol;
1404       ap[16 * i + 4 * cidx + ridx] = value;
1405     noinsert1:;
1406       low = i;
1407     }
1408     ailen[brow] = nrow;
1409   }
1410   PetscFunctionReturnVoid();
1411 }
1412 
1413 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1414 {
1415   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1416   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
1417   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
1418 
1419   PetscFunctionBegin;
1420   *nn = n;
1421   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1422   if (symmetric) {
1423     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1424     nz = tia[n];
1425   } else {
1426     tia = a->i;
1427     tja = a->j;
1428   }
1429 
1430   if (!blockcompressed && bs > 1) {
1431     (*nn) *= bs;
1432     /* malloc & create the natural set of indices */
1433     PetscCall(PetscMalloc1((n + 1) * bs, ia));
1434     if (n) {
1435       (*ia)[0] = oshift;
1436       for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
1437     }
1438 
1439     for (i = 1; i < n; i++) {
1440       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1441       for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
1442     }
1443     if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1444 
1445     if (inja) {
1446       PetscCall(PetscMalloc1(nz * bs * bs, ja));
1447       cnt = 0;
1448       for (i = 0; i < n; i++) {
1449         for (j = 0; j < bs; j++) {
1450           for (k = tia[i]; k < tia[i + 1]; k++) {
1451             for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
1452           }
1453         }
1454       }
1455     }
1456 
1457     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1458       PetscCall(PetscFree(tia));
1459       PetscCall(PetscFree(tja));
1460     }
1461   } else if (oshift == 1) {
1462     if (symmetric) {
1463       nz = tia[A->rmap->n / bs];
1464       /*  add 1 to i and j indices */
1465       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1466       *ia = tia;
1467       if (ja) {
1468         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1469         *ja = tja;
1470       }
1471     } else {
1472       nz = a->i[A->rmap->n / bs];
1473       /* malloc space and  add 1 to i and j indices */
1474       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1475       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1476       if (ja) {
1477         PetscCall(PetscMalloc1(nz, ja));
1478         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1479       }
1480     }
1481   } else {
1482     *ia = tia;
1483     if (ja) *ja = tja;
1484   }
1485   PetscFunctionReturn(PETSC_SUCCESS);
1486 }
1487 
1488 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1489 {
1490   PetscFunctionBegin;
1491   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1492   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1493     PetscCall(PetscFree(*ia));
1494     if (ja) PetscCall(PetscFree(*ja));
1495   }
1496   PetscFunctionReturn(PETSC_SUCCESS);
1497 }
1498 
1499 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1500 {
1501   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1502 
1503   PetscFunctionBegin;
1504   if (A->hash_active) {
1505     PetscInt bs;
1506     A->ops[0] = a->cops;
1507     PetscCall(PetscHMapIJVDestroy(&a->ht));
1508     PetscCall(MatGetBlockSize(A, &bs));
1509     if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht));
1510     PetscCall(PetscFree(a->dnz));
1511     PetscCall(PetscFree(a->bdnz));
1512     A->hash_active = PETSC_FALSE;
1513   }
1514   PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
1515   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
1516   PetscCall(ISDestroy(&a->row));
1517   PetscCall(ISDestroy(&a->col));
1518   PetscCall(PetscFree(a->diag));
1519   PetscCall(PetscFree(a->idiag));
1520   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
1521   PetscCall(PetscFree(a->solve_work));
1522   PetscCall(PetscFree(a->mult_work));
1523   PetscCall(PetscFree(a->sor_workt));
1524   PetscCall(PetscFree(a->sor_work));
1525   PetscCall(ISDestroy(&a->icol));
1526   PetscCall(PetscFree(a->saved_values));
1527   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1528 
1529   PetscCall(MatDestroy(&a->sbaijMat));
1530   PetscCall(MatDestroy(&a->parent));
1531   PetscCall(PetscFree(A->data));
1532 
1533   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
1534   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
1535   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
1536   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
1537   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
1538   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
1539   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
1540   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
1541   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
1542   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
1543   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
1544   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
1545 #if defined(PETSC_HAVE_HYPRE)
1546   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
1547 #endif
1548   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
1549   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1550   PetscFunctionReturn(PETSC_SUCCESS);
1551 }
1552 
1553 static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1554 {
1555   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1556 
1557   PetscFunctionBegin;
1558   switch (op) {
1559   case MAT_ROW_ORIENTED:
1560     a->roworiented = flg;
1561     break;
1562   case MAT_KEEP_NONZERO_PATTERN:
1563     a->keepnonzeropattern = flg;
1564     break;
1565   case MAT_NEW_NONZERO_LOCATIONS:
1566     a->nonew = (flg ? 0 : 1);
1567     break;
1568   case MAT_NEW_NONZERO_LOCATION_ERR:
1569     a->nonew = (flg ? -1 : 0);
1570     break;
1571   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1572     a->nonew = (flg ? -2 : 0);
1573     break;
1574   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1575     a->nounused = (flg ? -1 : 0);
1576     break;
1577   default:
1578     break;
1579   }
1580   PetscFunctionReturn(PETSC_SUCCESS);
1581 }
1582 
1583 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1584 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1585 {
1586   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
1587   MatScalar   *aa_i;
1588   PetscScalar *v_i;
1589 
1590   PetscFunctionBegin;
1591   bs  = A->rmap->bs;
1592   bs2 = bs * bs;
1593   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1594 
1595   bn  = row / bs; /* Block number */
1596   bp  = row % bs; /* Block Position */
1597   M   = ai[bn + 1] - ai[bn];
1598   *nz = bs * M;
1599 
1600   if (v) {
1601     *v = NULL;
1602     if (*nz) {
1603       PetscCall(PetscMalloc1(*nz, v));
1604       for (i = 0; i < M; i++) { /* for each block in the block row */
1605         v_i  = *v + i * bs;
1606         aa_i = aa + bs2 * (ai[bn] + i);
1607         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
1608       }
1609     }
1610   }
1611 
1612   if (idx) {
1613     *idx = NULL;
1614     if (*nz) {
1615       PetscCall(PetscMalloc1(*nz, idx));
1616       for (i = 0; i < M; i++) { /* for each block in the block row */
1617         idx_i = *idx + i * bs;
1618         itmp  = bs * aj[ai[bn] + i];
1619         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
1620       }
1621     }
1622   }
1623   PetscFunctionReturn(PETSC_SUCCESS);
1624 }
1625 
1626 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1627 {
1628   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1629 
1630   PetscFunctionBegin;
1631   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
1632   PetscFunctionReturn(PETSC_SUCCESS);
1633 }
1634 
1635 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1636 {
1637   PetscFunctionBegin;
1638   if (idx) PetscCall(PetscFree(*idx));
1639   if (v) PetscCall(PetscFree(*v));
1640   PetscFunctionReturn(PETSC_SUCCESS);
1641 }
1642 
1643 static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1644 {
1645   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
1646   Mat          C;
1647   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
1648   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
1649   MatScalar   *ata, *aa = a->a;
1650 
1651   PetscFunctionBegin;
1652   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1653   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1654   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1655     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1656 
1657     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
1658     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
1659     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
1660     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
1661 
1662     at  = (Mat_SeqBAIJ *)C->data;
1663     ati = at->i;
1664     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1665   } else {
1666     C   = *B;
1667     at  = (Mat_SeqBAIJ *)C->data;
1668     ati = at->i;
1669   }
1670 
1671   atj = at->j;
1672   ata = at->a;
1673 
1674   /* Copy ati into atfill so we have locations of the next free space in atj */
1675   PetscCall(PetscArraycpy(atfill, ati, nbs));
1676 
1677   /* Walk through A row-wise and mark nonzero entries of A^T. */
1678   for (i = 0; i < mbs; i++) {
1679     anzj = ai[i + 1] - ai[i];
1680     for (j = 0; j < anzj; j++) {
1681       atj[atfill[*aj]] = i;
1682       for (kr = 0; kr < bs; kr++) {
1683         for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
1684       }
1685       atfill[*aj++] += 1;
1686     }
1687   }
1688   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1689   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1690 
1691   /* Clean up temporary space and complete requests. */
1692   PetscCall(PetscFree(atfill));
1693 
1694   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1695     PetscCall(MatSetBlockSizes(C, A->cmap->bs, A->rmap->bs));
1696     *B = C;
1697   } else {
1698     PetscCall(MatHeaderMerge(A, &C));
1699   }
1700   PetscFunctionReturn(PETSC_SUCCESS);
1701 }
1702 
1703 static PetscErrorCode MatCompare_SeqBAIJ_Private(Mat A, Mat B, PetscReal tol, PetscBool *flg)
1704 {
1705   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
1706 
1707   PetscFunctionBegin;
1708   /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */
1709   if (A->rmap->N != B->rmap->N || A->cmap->n != B->cmap->n || A->rmap->bs != B->rmap->bs || a->nz != b->nz) {
1710     *flg = PETSC_FALSE;
1711     PetscFunctionReturn(PETSC_SUCCESS);
1712   }
1713 
1714   /* if the a->i are the same */
1715   PetscCall(PetscArraycmp(a->i, b->i, a->mbs + 1, flg));
1716   if (!*flg) PetscFunctionReturn(PETSC_SUCCESS);
1717 
1718   /* if a->j are the same */
1719   PetscCall(PetscArraycmp(a->j, b->j, a->nz, flg));
1720   if (!*flg) PetscFunctionReturn(PETSC_SUCCESS);
1721 
1722   if (tol == 0.0) PetscCall(PetscArraycmp(a->a, b->a, a->nz * A->rmap->bs * A->rmap->bs, flg)); /* if a->a are the same */
1723   else {
1724     *flg = PETSC_TRUE;
1725     for (PetscInt i = 0; (i < a->nz * A->rmap->bs * A->rmap->bs) && *flg; ++i)
1726       if (PetscAbsScalar(a->a[i] - b->a[i]) > tol) *flg = PETSC_FALSE;
1727   }
1728   PetscFunctionReturn(PETSC_SUCCESS);
1729 }
1730 
1731 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1732 {
1733   Mat Btrans;
1734 
1735   PetscFunctionBegin;
1736   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
1737   PetscCall(MatCompare_SeqBAIJ_Private(A, Btrans, tol, f));
1738   PetscCall(MatDestroy(&Btrans));
1739   PetscFunctionReturn(PETSC_SUCCESS);
1740 }
1741 
1742 static PetscErrorCode MatEqual_SeqBAIJ(Mat A, Mat B, PetscBool *flg)
1743 {
1744   PetscFunctionBegin;
1745   PetscCall(MatCompare_SeqBAIJ_Private(A, B, 0.0, flg));
1746   PetscFunctionReturn(PETSC_SUCCESS);
1747 }
1748 
1749 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1750 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1751 {
1752   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1753   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1754   PetscInt    *rowlens, *colidxs;
1755   PetscScalar *matvals;
1756 
1757   PetscFunctionBegin;
1758   PetscCall(PetscViewerSetUp(viewer));
1759 
1760   M  = mat->rmap->N;
1761   N  = mat->cmap->N;
1762   m  = mat->rmap->n;
1763   bs = mat->rmap->bs;
1764   nz = bs * bs * A->nz;
1765 
1766   /* write matrix header */
1767   header[0] = MAT_FILE_CLASSID;
1768   header[1] = M;
1769   header[2] = N;
1770   header[3] = nz;
1771   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1772 
1773   /* store row lengths */
1774   PetscCall(PetscMalloc1(m, &rowlens));
1775   for (cnt = 0, i = 0; i < A->mbs; i++)
1776     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
1777   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
1778   PetscCall(PetscFree(rowlens));
1779 
1780   /* store column indices  */
1781   PetscCall(PetscMalloc1(nz, &colidxs));
1782   for (cnt = 0, i = 0; i < A->mbs; i++)
1783     for (k = 0; k < bs; k++)
1784       for (j = A->i[i]; j < A->i[i + 1]; j++)
1785         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
1786   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1787   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
1788   PetscCall(PetscFree(colidxs));
1789 
1790   /* store nonzero values */
1791   PetscCall(PetscMalloc1(nz, &matvals));
1792   for (cnt = 0, i = 0; i < A->mbs; i++)
1793     for (k = 0; k < bs; k++)
1794       for (j = A->i[i]; j < A->i[i + 1]; j++)
1795         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
1796   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1797   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
1798   PetscCall(PetscFree(matvals));
1799 
1800   /* write block size option to the viewer's .info file */
1801   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1802   PetscFunctionReturn(PETSC_SUCCESS);
1803 }
1804 
1805 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1806 {
1807   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1808   PetscInt     i, bs = A->rmap->bs, k;
1809 
1810   PetscFunctionBegin;
1811   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1812   for (i = 0; i < a->mbs; i++) {
1813     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
1814     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
1815     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1816   }
1817   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1818   PetscFunctionReturn(PETSC_SUCCESS);
1819 }
1820 
1821 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1822 {
1823   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1824   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1825   PetscViewerFormat format;
1826 
1827   PetscFunctionBegin;
1828   if (A->structure_only) {
1829     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
1830     PetscFunctionReturn(PETSC_SUCCESS);
1831   }
1832 
1833   PetscCall(PetscViewerGetFormat(viewer, &format));
1834   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1835     PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1836   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1837     const char *matname;
1838     Mat         aij;
1839     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
1840     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
1841     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
1842     PetscCall(MatView(aij, viewer));
1843     PetscCall(MatDestroy(&aij));
1844   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1845     PetscFunctionReturn(PETSC_SUCCESS);
1846   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1847     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1848     for (i = 0; i < a->mbs; i++) {
1849       for (j = 0; j < bs; j++) {
1850         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1851         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1852           for (l = 0; l < bs; l++) {
1853 #if defined(PETSC_USE_COMPLEX)
1854             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1855               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1856             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1857               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1858             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1859               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1860             }
1861 #else
1862             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1863 #endif
1864           }
1865         }
1866         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1867       }
1868     }
1869     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1870   } else {
1871     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1872     for (i = 0; i < a->mbs; i++) {
1873       for (j = 0; j < bs; j++) {
1874         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1875         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1876           for (l = 0; l < bs; l++) {
1877 #if defined(PETSC_USE_COMPLEX)
1878             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
1879               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1880             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
1881               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1882             } else {
1883               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1884             }
1885 #else
1886             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1887 #endif
1888           }
1889         }
1890         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1891       }
1892     }
1893     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1894   }
1895   PetscCall(PetscViewerFlush(viewer));
1896   PetscFunctionReturn(PETSC_SUCCESS);
1897 }
1898 
1899 #include <petscdraw.h>
1900 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1901 {
1902   Mat               A = (Mat)Aa;
1903   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1904   PetscInt          row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
1905   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1906   MatScalar        *aa;
1907   PetscViewer       viewer;
1908   PetscViewerFormat format;
1909   int               color;
1910 
1911   PetscFunctionBegin;
1912   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
1913   PetscCall(PetscViewerGetFormat(viewer, &format));
1914   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
1915 
1916   /* loop over matrix elements drawing boxes */
1917 
1918   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1919     PetscDrawCollectiveBegin(draw);
1920     /* Blue for negative, Cyan for zero and  Red for positive */
1921     color = PETSC_DRAW_BLUE;
1922     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1923       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1924         y_l = A->rmap->N - row - 1.0;
1925         y_r = y_l + 1.0;
1926         x_l = a->j[j] * bs;
1927         x_r = x_l + 1.0;
1928         aa  = a->a + j * bs2;
1929         for (k = 0; k < bs; k++) {
1930           for (l = 0; l < bs; l++) {
1931             if (PetscRealPart(*aa++) >= 0.) continue;
1932             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1933           }
1934         }
1935       }
1936     }
1937     color = PETSC_DRAW_CYAN;
1938     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1939       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1940         y_l = A->rmap->N - row - 1.0;
1941         y_r = y_l + 1.0;
1942         x_l = a->j[j] * bs;
1943         x_r = x_l + 1.0;
1944         aa  = a->a + j * bs2;
1945         for (k = 0; k < bs; k++) {
1946           for (l = 0; l < bs; l++) {
1947             if (PetscRealPart(*aa++) != 0.) continue;
1948             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1949           }
1950         }
1951       }
1952     }
1953     color = PETSC_DRAW_RED;
1954     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1955       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1956         y_l = A->rmap->N - row - 1.0;
1957         y_r = y_l + 1.0;
1958         x_l = a->j[j] * bs;
1959         x_r = x_l + 1.0;
1960         aa  = a->a + j * bs2;
1961         for (k = 0; k < bs; k++) {
1962           for (l = 0; l < bs; l++) {
1963             if (PetscRealPart(*aa++) <= 0.) continue;
1964             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1965           }
1966         }
1967       }
1968     }
1969     PetscDrawCollectiveEnd(draw);
1970   } else {
1971     /* use contour shading to indicate magnitude of values */
1972     /* first determine max of all nonzero values */
1973     PetscReal minv = 0.0, maxv = 0.0;
1974     PetscDraw popup;
1975 
1976     for (i = 0; i < a->nz * a->bs2; i++) {
1977       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1978     }
1979     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1980     PetscCall(PetscDrawGetPopup(draw, &popup));
1981     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
1982 
1983     PetscDrawCollectiveBegin(draw);
1984     for (i = 0, row = 0; i < mbs; i++, row += bs) {
1985       for (j = a->i[i]; j < a->i[i + 1]; j++) {
1986         y_l = A->rmap->N - row - 1.0;
1987         y_r = y_l + 1.0;
1988         x_l = a->j[j] * bs;
1989         x_r = x_l + 1.0;
1990         aa  = a->a + j * bs2;
1991         for (k = 0; k < bs; k++) {
1992           for (l = 0; l < bs; l++) {
1993             MatScalar v = *aa++;
1994             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
1995             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1996           }
1997         }
1998       }
1999     }
2000     PetscDrawCollectiveEnd(draw);
2001   }
2002   PetscFunctionReturn(PETSC_SUCCESS);
2003 }
2004 
2005 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2006 {
2007   PetscReal xl, yl, xr, yr, w, h;
2008   PetscDraw draw;
2009   PetscBool isnull;
2010 
2011   PetscFunctionBegin;
2012   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
2013   PetscCall(PetscDrawIsNull(draw, &isnull));
2014   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
2015 
2016   xr = A->cmap->n;
2017   yr = A->rmap->N;
2018   h  = yr / 10.0;
2019   w  = xr / 10.0;
2020   xr += w;
2021   yr += h;
2022   xl = -w;
2023   yl = -h;
2024   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
2025   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
2026   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
2027   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
2028   PetscCall(PetscDrawSave(draw));
2029   PetscFunctionReturn(PETSC_SUCCESS);
2030 }
2031 
2032 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2033 {
2034   PetscBool isascii, isbinary, isdraw;
2035 
2036   PetscFunctionBegin;
2037   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
2038   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2039   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
2040   if (isascii) {
2041     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
2042   } else if (isbinary) {
2043     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
2044   } else if (isdraw) {
2045     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
2046   } else {
2047     Mat B;
2048     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2049     PetscCall(MatView(B, viewer));
2050     PetscCall(MatDestroy(&B));
2051   }
2052   PetscFunctionReturn(PETSC_SUCCESS);
2053 }
2054 
2055 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2056 {
2057   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2058   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2059   PetscInt    *ai = a->i, *ailen = a->ilen;
2060   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
2061   MatScalar   *ap, *aa = a->a;
2062 
2063   PetscFunctionBegin;
2064   for (k = 0; k < m; k++) { /* loop over rows */
2065     row  = im[k];
2066     brow = row / bs;
2067     if (row < 0) {
2068       v += n;
2069       continue;
2070     } /* negative row */
2071     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2072     rp   = PetscSafePointerPlusOffset(aj, ai[brow]);
2073     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
2074     nrow = ailen[brow];
2075     for (l = 0; l < n; l++) { /* loop over columns */
2076       if (in[l] < 0) {
2077         v++;
2078         continue;
2079       } /* negative column */
2080       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
2081       col  = in[l];
2082       bcol = col / bs;
2083       cidx = col % bs;
2084       ridx = row % bs;
2085       high = nrow;
2086       low  = 0; /* assume unsorted */
2087       while (high - low > 5) {
2088         t = (low + high) / 2;
2089         if (rp[t] > bcol) high = t;
2090         else low = t;
2091       }
2092       for (i = low; i < high; i++) {
2093         if (rp[i] > bcol) break;
2094         if (rp[i] == bcol) {
2095           *v++ = ap[bs2 * i + bs * cidx + ridx];
2096           goto finished;
2097         }
2098       }
2099       *v++ = 0.0;
2100     finished:;
2101     }
2102   }
2103   PetscFunctionReturn(PETSC_SUCCESS);
2104 }
2105 
2106 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2107 {
2108   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2109   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2110   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2111   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2112   PetscBool          roworiented = a->roworiented;
2113   const PetscScalar *value       = v;
2114   MatScalar         *ap = NULL, *aa = a->a, *bap;
2115 
2116   PetscFunctionBegin;
2117   if (roworiented) {
2118     stepval = (n - 1) * bs;
2119   } else {
2120     stepval = (m - 1) * bs;
2121   }
2122   for (k = 0; k < m; k++) { /* loop over added rows */
2123     row = im[k];
2124     if (row < 0) continue;
2125     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
2126     rp = aj + ai[row];
2127     if (!A->structure_only) ap = aa + bs2 * ai[row];
2128     rmax = imax[row];
2129     nrow = ailen[row];
2130     low  = 0;
2131     high = nrow;
2132     for (l = 0; l < n; l++) { /* loop over added columns */
2133       if (in[l] < 0) continue;
2134       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
2135       col = in[l];
2136       if (!A->structure_only) {
2137         if (roworiented) {
2138           value = v + (k * (stepval + bs) + l) * bs;
2139         } else {
2140           value = v + (l * (stepval + bs) + k) * bs;
2141         }
2142       }
2143       if (col <= lastcol) low = 0;
2144       else high = nrow;
2145       lastcol = col;
2146       while (high - low > 7) {
2147         t = (low + high) / 2;
2148         if (rp[t] > col) high = t;
2149         else low = t;
2150       }
2151       for (i = low; i < high; i++) {
2152         if (rp[i] > col) break;
2153         if (rp[i] == col) {
2154           if (A->structure_only) goto noinsert2;
2155           bap = ap + bs2 * i;
2156           if (roworiented) {
2157             if (is == ADD_VALUES) {
2158               for (ii = 0; ii < bs; ii++, value += stepval) {
2159                 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2160               }
2161             } else {
2162               for (ii = 0; ii < bs; ii++, value += stepval) {
2163                 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2164               }
2165             }
2166           } else {
2167             if (is == ADD_VALUES) {
2168               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2169                 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
2170                 bap += bs;
2171               }
2172             } else {
2173               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2174                 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
2175                 bap += bs;
2176               }
2177             }
2178           }
2179           goto noinsert2;
2180         }
2181       }
2182       if (nonew == 1) goto noinsert2;
2183       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2184       if (A->structure_only) {
2185         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
2186       } else {
2187         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2188       }
2189       N = nrow++ - 1;
2190       high++;
2191       /* shift up all the later entries in this row */
2192       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2193       rp[i] = col;
2194       if (!A->structure_only) {
2195         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2196         bap = ap + bs2 * i;
2197         if (roworiented) {
2198           for (ii = 0; ii < bs; ii++, value += stepval) {
2199             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2200           }
2201         } else {
2202           for (ii = 0; ii < bs; ii++, value += stepval) {
2203             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2204           }
2205         }
2206       }
2207     noinsert2:;
2208       low = i;
2209     }
2210     ailen[row] = nrow;
2211   }
2212   PetscFunctionReturn(PETSC_SUCCESS);
2213 }
2214 
2215 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2216 {
2217   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2218   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2219   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2220   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2221   MatScalar   *aa    = a->a, *ap;
2222   PetscReal    ratio = 0.6;
2223 
2224   PetscFunctionBegin;
2225   if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS);
2226 
2227   if (m) rmax = ailen[0];
2228   for (i = 1; i < mbs; i++) {
2229     /* move each row back by the amount of empty slots (fshift) before it*/
2230     fshift += imax[i - 1] - ailen[i - 1];
2231     rmax = PetscMax(rmax, ailen[i]);
2232     if (fshift) {
2233       ip = aj + ai[i];
2234       ap = aa + bs2 * ai[i];
2235       N  = ailen[i];
2236       PetscCall(PetscArraymove(ip - fshift, ip, N));
2237       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2238     }
2239     ai[i] = ai[i - 1] + ailen[i - 1];
2240   }
2241   if (mbs) {
2242     fshift += imax[mbs - 1] - ailen[mbs - 1];
2243     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2244   }
2245 
2246   /* reset ilen and imax for each row */
2247   a->nonzerorowcnt = 0;
2248   if (A->structure_only) {
2249     PetscCall(PetscFree2(a->imax, a->ilen));
2250   } else { /* !A->structure_only */
2251     for (i = 0; i < mbs; i++) {
2252       ailen[i] = imax[i] = ai[i + 1] - ai[i];
2253       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2254     }
2255   }
2256   a->nz = ai[mbs];
2257 
2258   /* diagonals may have moved, so kill the diagonal pointers */
2259   a->idiagvalid = PETSC_FALSE;
2260   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
2261   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
2262   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
2263   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
2264 
2265   A->info.mallocs += a->reallocs;
2266   a->reallocs         = 0;
2267   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2268   a->rmax             = rmax;
2269 
2270   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
2271   PetscFunctionReturn(PETSC_SUCCESS);
2272 }
2273 
2274 /*
2275    This function returns an array of flags which indicate the locations of contiguous
2276    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2277    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2278    Assume: sizes should be long enough to hold all the values.
2279 */
2280 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2281 {
2282   PetscInt j = 0;
2283 
2284   PetscFunctionBegin;
2285   for (PetscInt i = 0; i < n; j++) {
2286     PetscInt row = idx[i];
2287     if (row % bs != 0) { /* Not the beginning of a block */
2288       sizes[j] = 1;
2289       i++;
2290     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2291       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2292       i++;
2293     } else { /* Beginning of the block, so check if the complete block exists */
2294       PetscBool flg = PETSC_TRUE;
2295       for (PetscInt k = 1; k < bs; k++) {
2296         if (row + k != idx[i + k]) { /* break in the block */
2297           flg = PETSC_FALSE;
2298           break;
2299         }
2300       }
2301       if (flg) { /* No break in the bs */
2302         sizes[j] = bs;
2303         i += bs;
2304       } else {
2305         sizes[j] = 1;
2306         i++;
2307       }
2308     }
2309   }
2310   *bs_max = j;
2311   PetscFunctionReturn(PETSC_SUCCESS);
2312 }
2313 
2314 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2315 {
2316   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2317   PetscInt           i, j, k, count, *rows;
2318   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
2319   PetscScalar        zero = 0.0;
2320   MatScalar         *aa;
2321   const PetscScalar *xx;
2322   PetscScalar       *bb;
2323 
2324   PetscFunctionBegin;
2325   /* fix right-hand side if needed */
2326   if (x && b) {
2327     PetscCall(VecGetArrayRead(x, &xx));
2328     PetscCall(VecGetArray(b, &bb));
2329     for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
2330     PetscCall(VecRestoreArrayRead(x, &xx));
2331     PetscCall(VecRestoreArray(b, &bb));
2332   }
2333 
2334   /* Make a copy of the IS and  sort it */
2335   /* allocate memory for rows,sizes */
2336   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2337 
2338   /* copy IS values to rows, and sort them */
2339   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
2340   PetscCall(PetscSortInt(is_n, rows));
2341 
2342   if (baij->keepnonzeropattern) {
2343     for (i = 0; i < is_n; i++) sizes[i] = 1;
2344     bs_max = is_n;
2345   } else {
2346     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2347     A->nonzerostate++;
2348   }
2349 
2350   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2351     row = rows[j];
2352     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2353     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2354     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2355     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2356       if (diag != (PetscScalar)0.0) {
2357         if (baij->ilen[row / bs] > 0) {
2358           baij->ilen[row / bs]       = 1;
2359           baij->j[baij->i[row / bs]] = row / bs;
2360 
2361           PetscCall(PetscArrayzero(aa, count * bs));
2362         }
2363         /* Now insert all the diagonal values for this bs */
2364         for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES);
2365       } else { /* (diag == 0.0) */
2366         baij->ilen[row / bs] = 0;
2367       } /* end (diag == 0.0) */
2368     } else { /* (sizes[i] != bs) */
2369       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2370       for (k = 0; k < count; k++) {
2371         aa[0] = zero;
2372         aa += bs;
2373       }
2374       if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES);
2375     }
2376   }
2377 
2378   PetscCall(PetscFree2(rows, sizes));
2379   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2380   PetscFunctionReturn(PETSC_SUCCESS);
2381 }
2382 
2383 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2384 {
2385   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2386   PetscInt           i, j, k, count;
2387   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
2388   PetscScalar        zero = 0.0;
2389   MatScalar         *aa;
2390   const PetscScalar *xx;
2391   PetscScalar       *bb;
2392   PetscBool         *zeroed, vecs = PETSC_FALSE;
2393 
2394   PetscFunctionBegin;
2395   /* fix right-hand side if needed */
2396   if (x && b) {
2397     PetscCall(VecGetArrayRead(x, &xx));
2398     PetscCall(VecGetArray(b, &bb));
2399     vecs = PETSC_TRUE;
2400   }
2401 
2402   /* zero the columns */
2403   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
2404   for (i = 0; i < is_n; i++) {
2405     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
2406     zeroed[is_idx[i]] = PETSC_TRUE;
2407   }
2408   for (i = 0; i < A->rmap->N; i++) {
2409     if (!zeroed[i]) {
2410       row = i / bs;
2411       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
2412         for (k = 0; k < bs; k++) {
2413           col = bs * baij->j[j] + k;
2414           if (zeroed[col]) {
2415             aa = baij->a + j * bs2 + (i % bs) + bs * k;
2416             if (vecs) bb[i] -= aa[0] * xx[col];
2417             aa[0] = 0.0;
2418           }
2419         }
2420       }
2421     } else if (vecs) bb[i] = diag * xx[i];
2422   }
2423   PetscCall(PetscFree(zeroed));
2424   if (vecs) {
2425     PetscCall(VecRestoreArrayRead(x, &xx));
2426     PetscCall(VecRestoreArray(b, &bb));
2427   }
2428 
2429   /* zero the rows */
2430   for (i = 0; i < is_n; i++) {
2431     row   = is_idx[i];
2432     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2433     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2434     for (k = 0; k < count; k++) {
2435       aa[0] = zero;
2436       aa += bs;
2437     }
2438     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
2439   }
2440   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2441   PetscFunctionReturn(PETSC_SUCCESS);
2442 }
2443 
2444 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2445 {
2446   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2447   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2448   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2449   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2450   PetscInt     ridx, cidx, bs2                 = a->bs2;
2451   PetscBool    roworiented = a->roworiented;
2452   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;
2453 
2454   PetscFunctionBegin;
2455   for (k = 0; k < m; k++) { /* loop over added rows */
2456     row  = im[k];
2457     brow = row / bs;
2458     if (row < 0) continue;
2459     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
2460     rp = PetscSafePointerPlusOffset(aj, ai[brow]);
2461     if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
2462     rmax = imax[brow];
2463     nrow = ailen[brow];
2464     low  = 0;
2465     high = nrow;
2466     for (l = 0; l < n; l++) { /* loop over added columns */
2467       if (in[l] < 0) continue;
2468       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
2469       col  = in[l];
2470       bcol = col / bs;
2471       ridx = row % bs;
2472       cidx = col % bs;
2473       if (!A->structure_only) {
2474         if (roworiented) {
2475           value = v[l + k * n];
2476         } else {
2477           value = v[k + l * m];
2478         }
2479       }
2480       if (col <= lastcol) low = 0;
2481       else high = nrow;
2482       lastcol = col;
2483       while (high - low > 7) {
2484         t = (low + high) / 2;
2485         if (rp[t] > bcol) high = t;
2486         else low = t;
2487       }
2488       for (i = low; i < high; i++) {
2489         if (rp[i] > bcol) break;
2490         if (rp[i] == bcol) {
2491           bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx);
2492           if (!A->structure_only) {
2493             if (is == ADD_VALUES) *bap += value;
2494             else *bap = value;
2495           }
2496           goto noinsert1;
2497         }
2498       }
2499       if (nonew == 1) goto noinsert1;
2500       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2501       if (A->structure_only) {
2502         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2503       } else {
2504         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2505       }
2506       N = nrow++ - 1;
2507       high++;
2508       /* shift up all the later entries in this row */
2509       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2510       rp[i] = bcol;
2511       if (!A->structure_only) {
2512         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2513         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2514         ap[bs2 * i + bs * cidx + ridx] = value;
2515       }
2516       a->nz++;
2517     noinsert1:;
2518       low = i;
2519     }
2520     ailen[brow] = nrow;
2521   }
2522   PetscFunctionReturn(PETSC_SUCCESS);
2523 }
2524 
2525 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2526 {
2527   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
2528   Mat          outA;
2529   PetscBool    row_identity, col_identity;
2530 
2531   PetscFunctionBegin;
2532   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
2533   PetscCall(ISIdentity(row, &row_identity));
2534   PetscCall(ISIdentity(col, &col_identity));
2535   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
2536 
2537   outA            = inA;
2538   inA->factortype = MAT_FACTOR_LU;
2539   PetscCall(PetscFree(inA->solvertype));
2540   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
2541 
2542   PetscCall(PetscObjectReference((PetscObject)row));
2543   PetscCall(ISDestroy(&a->row));
2544   a->row = row;
2545   PetscCall(PetscObjectReference((PetscObject)col));
2546   PetscCall(ISDestroy(&a->col));
2547   a->col = col;
2548 
2549   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2550   PetscCall(ISDestroy(&a->icol));
2551   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2552 
2553   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2554   if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
2555   PetscCall(MatLUFactorNumeric(outA, inA, info));
2556   PetscFunctionReturn(PETSC_SUCCESS);
2557 }
2558 
2559 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2560 {
2561   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2562 
2563   PetscFunctionBegin;
2564   baij->nz = baij->maxnz;
2565   PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2566   PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
2567   PetscFunctionReturn(PETSC_SUCCESS);
2568 }
2569 
2570 /*@
2571   MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix.
2572 
2573   Input Parameters:
2574 + mat     - the `MATSEQBAIJ` matrix
2575 - indices - the block column indices
2576 
2577   Level: advanced
2578 
2579   Notes:
2580   This can be called if you have precomputed the nonzero structure of the
2581   matrix and want to provide it to the matrix object to improve the performance
2582   of the `MatSetValues()` operation.
2583 
2584   You MUST have set the correct numbers of nonzeros per row in the call to
2585   `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
2586 
2587   MUST be called before any calls to `MatSetValues()`
2588 
2589 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()`
2590 @*/
2591 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2592 {
2593   PetscFunctionBegin;
2594   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2595   PetscAssertPointer(indices, 2);
2596   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices));
2597   PetscFunctionReturn(PETSC_SUCCESS);
2598 }
2599 
2600 static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2601 {
2602   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2603   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2604   PetscReal    atmp;
2605   PetscScalar *x, zero = 0.0;
2606   MatScalar   *aa;
2607   PetscInt     ncols, brow, krow, kcol;
2608 
2609   PetscFunctionBegin;
2610   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2611   bs  = A->rmap->bs;
2612   aa  = a->a;
2613   ai  = a->i;
2614   aj  = a->j;
2615   mbs = a->mbs;
2616 
2617   PetscCall(VecSet(v, zero));
2618   PetscCall(VecGetArray(v, &x));
2619   PetscCall(VecGetLocalSize(v, &n));
2620   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2621   for (i = 0; i < mbs; i++) {
2622     ncols = ai[1] - ai[0];
2623     ai++;
2624     brow = bs * i;
2625     for (j = 0; j < ncols; j++) {
2626       for (kcol = 0; kcol < bs; kcol++) {
2627         for (krow = 0; krow < bs; krow++) {
2628           atmp = PetscAbsScalar(*aa);
2629           aa++;
2630           row = brow + krow; /* row index */
2631           if (PetscAbsScalar(x[row]) < atmp) {
2632             x[row] = atmp;
2633             if (idx) idx[row] = bs * (*aj) + kcol;
2634           }
2635         }
2636       }
2637       aj++;
2638     }
2639   }
2640   PetscCall(VecRestoreArray(v, &x));
2641   PetscFunctionReturn(PETSC_SUCCESS);
2642 }
2643 
2644 static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v)
2645 {
2646   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2647   PetscInt     i, j, n, row, bs, *ai, mbs;
2648   PetscReal    atmp;
2649   PetscScalar *x, zero = 0.0;
2650   MatScalar   *aa;
2651   PetscInt     ncols, brow, krow, kcol;
2652 
2653   PetscFunctionBegin;
2654   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2655   bs  = A->rmap->bs;
2656   aa  = a->a;
2657   ai  = a->i;
2658   mbs = a->mbs;
2659 
2660   PetscCall(VecSet(v, zero));
2661   PetscCall(VecGetArrayWrite(v, &x));
2662   PetscCall(VecGetLocalSize(v, &n));
2663   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2664   for (i = 0; i < mbs; i++) {
2665     ncols = ai[1] - ai[0];
2666     ai++;
2667     brow = bs * i;
2668     for (j = 0; j < ncols; j++) {
2669       for (kcol = 0; kcol < bs; kcol++) {
2670         for (krow = 0; krow < bs; krow++) {
2671           atmp = PetscAbsScalar(*aa);
2672           aa++;
2673           row = brow + krow; /* row index */
2674           x[row] += atmp;
2675         }
2676       }
2677     }
2678   }
2679   PetscCall(VecRestoreArrayWrite(v, &x));
2680   PetscFunctionReturn(PETSC_SUCCESS);
2681 }
2682 
2683 static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2684 {
2685   PetscFunctionBegin;
2686   /* If the two matrices have the same copy implementation, use fast copy. */
2687   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2688     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
2689     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2690     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
2691 
2692     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
2693     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
2694     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
2695     PetscCall(PetscObjectStateIncrease((PetscObject)B));
2696   } else {
2697     PetscCall(MatCopy_Basic(A, B, str));
2698   }
2699   PetscFunctionReturn(PETSC_SUCCESS);
2700 }
2701 
2702 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2703 {
2704   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2705 
2706   PetscFunctionBegin;
2707   *array = a->a;
2708   PetscFunctionReturn(PETSC_SUCCESS);
2709 }
2710 
2711 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2712 {
2713   PetscFunctionBegin;
2714   *array = NULL;
2715   PetscFunctionReturn(PETSC_SUCCESS);
2716 }
2717 
2718 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2719 {
2720   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
2721   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
2722   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
2723 
2724   PetscFunctionBegin;
2725   /* Set the number of nonzeros in the new matrix */
2726   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
2727   PetscFunctionReturn(PETSC_SUCCESS);
2728 }
2729 
2730 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2731 {
2732   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
2733   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2734   PetscBLASInt one = 1;
2735 
2736   PetscFunctionBegin;
2737   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2738     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2739     if (e) {
2740       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2741       if (e) {
2742         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2743         if (e) str = SAME_NONZERO_PATTERN;
2744       }
2745     }
2746     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2747   }
2748   if (str == SAME_NONZERO_PATTERN) {
2749     PetscScalar  alpha = a;
2750     PetscBLASInt bnz;
2751     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2752     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
2753     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2754   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2755     PetscCall(MatAXPY_Basic(Y, a, X, str));
2756   } else {
2757     Mat       B;
2758     PetscInt *nnz;
2759     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
2760     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
2761     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2762     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2763     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
2764     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
2765     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
2766     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
2767     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
2768     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2769     PetscCall(MatHeaderMerge(Y, &B));
2770     PetscCall(PetscFree(nnz));
2771   }
2772   PetscFunctionReturn(PETSC_SUCCESS);
2773 }
2774 
2775 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2776 {
2777 #if PetscDefined(USE_COMPLEX)
2778   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2779   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2780   MatScalar   *aa = a->a;
2781 
2782   PetscFunctionBegin;
2783   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
2784   PetscFunctionReturn(PETSC_SUCCESS);
2785 #else
2786   (void)A;
2787   return PETSC_SUCCESS;
2788 #endif
2789 }
2790 
2791 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2792 {
2793 #if PetscDefined(USE_COMPLEX)
2794   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2795   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2796   MatScalar   *aa = a->a;
2797 
2798   PetscFunctionBegin;
2799   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
2800   PetscFunctionReturn(PETSC_SUCCESS);
2801 #else
2802   (void)A;
2803   return PETSC_SUCCESS;
2804 #endif
2805 }
2806 
2807 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2808 {
2809 #if PetscDefined(USE_COMPLEX)
2810   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2811   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2812   MatScalar   *aa = a->a;
2813 
2814   PetscFunctionBegin;
2815   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2816   PetscFunctionReturn(PETSC_SUCCESS);
2817 #else
2818   (void)A;
2819   return PETSC_SUCCESS;
2820 #endif
2821 }
2822 
2823 /*
2824     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2825 */
2826 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2827 {
2828   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
2829   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
2830   PetscInt     nz = a->i[m], row, *jj, mr, col;
2831 
2832   PetscFunctionBegin;
2833   *nn = n;
2834   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2835   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
2836   PetscCall(PetscCalloc1(n, &collengths));
2837   PetscCall(PetscMalloc1(n + 1, &cia));
2838   PetscCall(PetscMalloc1(nz, &cja));
2839   jj = a->j;
2840   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2841   cia[0] = oshift;
2842   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2843   PetscCall(PetscArrayzero(collengths, n));
2844   jj = a->j;
2845   for (row = 0; row < m; row++) {
2846     mr = a->i[row + 1] - a->i[row];
2847     for (i = 0; i < mr; i++) {
2848       col = *jj++;
2849 
2850       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2851     }
2852   }
2853   PetscCall(PetscFree(collengths));
2854   *ia = cia;
2855   *ja = cja;
2856   PetscFunctionReturn(PETSC_SUCCESS);
2857 }
2858 
2859 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2860 {
2861   PetscFunctionBegin;
2862   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2863   PetscCall(PetscFree(*ia));
2864   PetscCall(PetscFree(*ja));
2865   PetscFunctionReturn(PETSC_SUCCESS);
2866 }
2867 
2868 /*
2869  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2870  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2871  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2872  */
2873 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2874 {
2875   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2876   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2877   PetscInt     nz = a->i[m], row, *jj, mr, col;
2878   PetscInt    *cspidx;
2879 
2880   PetscFunctionBegin;
2881   *nn = n;
2882   if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2883 
2884   PetscCall(PetscCalloc1(n, &collengths));
2885   PetscCall(PetscMalloc1(n + 1, &cia));
2886   PetscCall(PetscMalloc1(nz, &cja));
2887   PetscCall(PetscMalloc1(nz, &cspidx));
2888   jj = a->j;
2889   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2890   cia[0] = oshift;
2891   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2892   PetscCall(PetscArrayzero(collengths, n));
2893   jj = a->j;
2894   for (row = 0; row < m; row++) {
2895     mr = a->i[row + 1] - a->i[row];
2896     for (i = 0; i < mr; i++) {
2897       col                                         = *jj++;
2898       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2899       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2900     }
2901   }
2902   PetscCall(PetscFree(collengths));
2903   *ia    = cia;
2904   *ja    = cja;
2905   *spidx = cspidx;
2906   PetscFunctionReturn(PETSC_SUCCESS);
2907 }
2908 
2909 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2910 {
2911   PetscFunctionBegin;
2912   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
2913   PetscCall(PetscFree(*spidx));
2914   PetscFunctionReturn(PETSC_SUCCESS);
2915 }
2916 
2917 static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2918 {
2919   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
2920 
2921   PetscFunctionBegin;
2922   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
2923   PetscCall(MatShift_Basic(Y, a));
2924   PetscFunctionReturn(PETSC_SUCCESS);
2925 }
2926 
2927 PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep)
2928 {
2929   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2930   PetscInt     fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k;
2931   PetscInt     m = A->rmap->N, *ailen = a->ilen;
2932   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2933   MatScalar   *aa = a->a, *ap;
2934   PetscBool    zero;
2935 
2936   PetscFunctionBegin;
2937   PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix");
2938   if (m) rmax = ailen[0];
2939   for (i = 1; i <= mbs; i++) {
2940     for (k = ai[i - 1]; k < ai[i]; k++) {
2941       zero = PETSC_TRUE;
2942       ap   = aa + bs2 * k;
2943       for (j = 0; j < bs2 && zero; j++) {
2944         if (ap[j] != 0.0) zero = PETSC_FALSE;
2945       }
2946       if (zero && (aj[k] != i - 1 || !keep)) fshift++;
2947       else {
2948         if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1));
2949         aj[k - fshift] = aj[k];
2950         PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2));
2951       }
2952     }
2953     ai[i - 1] -= fshift_prev;
2954     fshift_prev  = fshift;
2955     ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1];
2956     a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0);
2957     rmax = PetscMax(rmax, ailen[i - 1]);
2958   }
2959   if (fshift) {
2960     if (mbs) {
2961       ai[mbs] -= fshift;
2962       a->nz = ai[mbs];
2963     }
2964     PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz));
2965     A->nonzerostate++;
2966     A->info.nz_unneeded += (PetscReal)fshift;
2967     a->rmax = rmax;
2968     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2969     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
2970   }
2971   PetscFunctionReturn(PETSC_SUCCESS);
2972 }
2973 
2974 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2975                                        MatGetRow_SeqBAIJ,
2976                                        MatRestoreRow_SeqBAIJ,
2977                                        MatMult_SeqBAIJ_N,
2978                                        /* 4*/ MatMultAdd_SeqBAIJ_N,
2979                                        MatMultTranspose_SeqBAIJ,
2980                                        MatMultTransposeAdd_SeqBAIJ,
2981                                        NULL,
2982                                        NULL,
2983                                        NULL,
2984                                        /* 10*/ NULL,
2985                                        MatLUFactor_SeqBAIJ,
2986                                        NULL,
2987                                        NULL,
2988                                        MatTranspose_SeqBAIJ,
2989                                        /* 15*/ MatGetInfo_SeqBAIJ,
2990                                        MatEqual_SeqBAIJ,
2991                                        MatGetDiagonal_SeqBAIJ,
2992                                        MatDiagonalScale_SeqBAIJ,
2993                                        MatNorm_SeqBAIJ,
2994                                        /* 20*/ NULL,
2995                                        MatAssemblyEnd_SeqBAIJ,
2996                                        MatSetOption_SeqBAIJ,
2997                                        MatZeroEntries_SeqBAIJ,
2998                                        /* 24*/ MatZeroRows_SeqBAIJ,
2999                                        NULL,
3000                                        NULL,
3001                                        NULL,
3002                                        NULL,
3003                                        /* 29*/ MatSetUp_Seq_Hash,
3004                                        NULL,
3005                                        NULL,
3006                                        NULL,
3007                                        NULL,
3008                                        /* 34*/ MatDuplicate_SeqBAIJ,
3009                                        NULL,
3010                                        NULL,
3011                                        MatILUFactor_SeqBAIJ,
3012                                        NULL,
3013                                        /* 39*/ MatAXPY_SeqBAIJ,
3014                                        MatCreateSubMatrices_SeqBAIJ,
3015                                        MatIncreaseOverlap_SeqBAIJ,
3016                                        MatGetValues_SeqBAIJ,
3017                                        MatCopy_SeqBAIJ,
3018                                        /* 44*/ NULL,
3019                                        MatScale_SeqBAIJ,
3020                                        MatShift_SeqBAIJ,
3021                                        NULL,
3022                                        MatZeroRowsColumns_SeqBAIJ,
3023                                        /* 49*/ NULL,
3024                                        MatGetRowIJ_SeqBAIJ,
3025                                        MatRestoreRowIJ_SeqBAIJ,
3026                                        MatGetColumnIJ_SeqBAIJ,
3027                                        MatRestoreColumnIJ_SeqBAIJ,
3028                                        /* 54*/ MatFDColoringCreate_SeqXAIJ,
3029                                        NULL,
3030                                        NULL,
3031                                        NULL,
3032                                        MatSetValuesBlocked_SeqBAIJ,
3033                                        /* 59*/ MatCreateSubMatrix_SeqBAIJ,
3034                                        MatDestroy_SeqBAIJ,
3035                                        MatView_SeqBAIJ,
3036                                        NULL,
3037                                        NULL,
3038                                        /* 64*/ NULL,
3039                                        NULL,
3040                                        NULL,
3041                                        NULL,
3042                                        MatGetRowMaxAbs_SeqBAIJ,
3043                                        /* 69*/ NULL,
3044                                        MatConvert_Basic,
3045                                        NULL,
3046                                        MatFDColoringApply_BAIJ,
3047                                        NULL,
3048                                        /* 74*/ NULL,
3049                                        NULL,
3050                                        NULL,
3051                                        NULL,
3052                                        MatLoad_SeqBAIJ,
3053                                        /* 79*/ NULL,
3054                                        NULL,
3055                                        NULL,
3056                                        NULL,
3057                                        NULL,
3058                                        /* 84*/ NULL,
3059                                        NULL,
3060                                        NULL,
3061                                        NULL,
3062                                        NULL,
3063                                        /* 89*/ NULL,
3064                                        NULL,
3065                                        NULL,
3066                                        NULL,
3067                                        MatConjugate_SeqBAIJ,
3068                                        /* 94*/ NULL,
3069                                        NULL,
3070                                        MatRealPart_SeqBAIJ,
3071                                        MatImaginaryPart_SeqBAIJ,
3072                                        NULL,
3073                                        /* 99*/ NULL,
3074                                        NULL,
3075                                        NULL,
3076                                        NULL,
3077                                        NULL,
3078                                        /*104*/ NULL,
3079                                        NULL,
3080                                        NULL,
3081                                        NULL,
3082                                        NULL,
3083                                        /*109*/ NULL,
3084                                        NULL,
3085                                        MatMultHermitianTranspose_SeqBAIJ,
3086                                        MatMultHermitianTransposeAdd_SeqBAIJ,
3087                                        NULL,
3088                                        /*114*/ NULL,
3089                                        MatGetColumnReductions_SeqBAIJ,
3090                                        MatInvertBlockDiagonal_SeqBAIJ,
3091                                        NULL,
3092                                        NULL,
3093                                        /*119*/ NULL,
3094                                        NULL,
3095                                        NULL,
3096                                        NULL,
3097                                        NULL,
3098                                        /*124*/ NULL,
3099                                        NULL,
3100                                        MatSetBlockSizes_Default,
3101                                        NULL,
3102                                        MatFDColoringSetUp_SeqXAIJ,
3103                                        /*129*/ NULL,
3104                                        MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3105                                        MatDestroySubMatrices_SeqBAIJ,
3106                                        NULL,
3107                                        NULL,
3108                                        /*134*/ NULL,
3109                                        NULL,
3110                                        MatEliminateZeros_SeqBAIJ,
3111                                        MatGetRowSumAbs_SeqBAIJ,
3112                                        NULL,
3113                                        /*139*/ NULL,
3114                                        NULL,
3115                                        MatCopyHashToXAIJ_Seq_Hash,
3116                                        NULL,
3117                                        NULL};
3118 
3119 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3120 {
3121   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3122   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3123 
3124   PetscFunctionBegin;
3125   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3126 
3127   /* allocate space for values if not already there */
3128   if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
3129 
3130   /* copy values over */
3131   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
3132   PetscFunctionReturn(PETSC_SUCCESS);
3133 }
3134 
3135 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3136 {
3137   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3138   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
3139 
3140   PetscFunctionBegin;
3141   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3142   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
3143 
3144   /* copy values over */
3145   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
3146   PetscFunctionReturn(PETSC_SUCCESS);
3147 }
3148 
3149 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3150 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3151 
3152 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3153 {
3154   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
3155   PetscInt     i, mbs, nbs, bs2;
3156   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3157 
3158   PetscFunctionBegin;
3159   if (B->hash_active) {
3160     PetscInt bs;
3161     B->ops[0] = b->cops;
3162     PetscCall(PetscHMapIJVDestroy(&b->ht));
3163     PetscCall(MatGetBlockSize(B, &bs));
3164     if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht));
3165     PetscCall(PetscFree(b->dnz));
3166     PetscCall(PetscFree(b->bdnz));
3167     B->hash_active = PETSC_FALSE;
3168   }
3169   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3170   if (nz == MAT_SKIP_ALLOCATION) {
3171     skipallocation = PETSC_TRUE;
3172     nz             = 0;
3173   }
3174 
3175   PetscCall(MatSetBlockSize(B, bs));
3176   PetscCall(PetscLayoutSetUp(B->rmap));
3177   PetscCall(PetscLayoutSetUp(B->cmap));
3178   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3179 
3180   B->preallocated = PETSC_TRUE;
3181 
3182   mbs = B->rmap->n / bs;
3183   nbs = B->cmap->n / bs;
3184   bs2 = bs * bs;
3185 
3186   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3187 
3188   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3189   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3190   if (nnz) {
3191     for (i = 0; i < mbs; i++) {
3192       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
3193       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3194     }
3195   }
3196 
3197   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
3198   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3199   PetscOptionsEnd();
3200 
3201   if (!flg) {
3202     switch (bs) {
3203     case 1:
3204       B->ops->mult    = MatMult_SeqBAIJ_1;
3205       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3206       break;
3207     case 2:
3208       B->ops->mult    = MatMult_SeqBAIJ_2;
3209       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3210       break;
3211     case 3:
3212       B->ops->mult    = MatMult_SeqBAIJ_3;
3213       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3214       break;
3215     case 4:
3216       B->ops->mult    = MatMult_SeqBAIJ_4;
3217       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3218       break;
3219     case 5:
3220       B->ops->mult    = MatMult_SeqBAIJ_5;
3221       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3222       break;
3223     case 6:
3224       B->ops->mult    = MatMult_SeqBAIJ_6;
3225       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3226       break;
3227     case 7:
3228       B->ops->mult    = MatMult_SeqBAIJ_7;
3229       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3230       break;
3231     case 9: {
3232       PetscInt version = 1;
3233       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3234       switch (version) {
3235 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3236       case 1:
3237         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
3238         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
3239         PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3240         break;
3241 #endif
3242       default:
3243         B->ops->mult    = MatMult_SeqBAIJ_N;
3244         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3245         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3246         break;
3247       }
3248       break;
3249     }
3250     case 11:
3251       B->ops->mult    = MatMult_SeqBAIJ_11;
3252       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3253       break;
3254     case 12: {
3255       PetscInt version = 1;
3256       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3257       switch (version) {
3258       case 1:
3259         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
3260         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3261         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3262         break;
3263       case 2:
3264         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
3265         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
3266         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3267         break;
3268 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3269       case 3:
3270         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
3271         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3272         PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3273         break;
3274 #endif
3275       default:
3276         B->ops->mult    = MatMult_SeqBAIJ_N;
3277         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3278         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3279         break;
3280       }
3281       break;
3282     }
3283     case 15: {
3284       PetscInt version = 1;
3285       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3286       switch (version) {
3287       case 1:
3288         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3289         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3290         break;
3291       case 2:
3292         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
3293         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3294         break;
3295       case 3:
3296         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3297         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3298         break;
3299       case 4:
3300         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3301         PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3302         break;
3303       default:
3304         B->ops->mult = MatMult_SeqBAIJ_N;
3305         PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3306         break;
3307       }
3308       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3309       break;
3310     }
3311     default:
3312       B->ops->mult    = MatMult_SeqBAIJ_N;
3313       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3314       PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3315       break;
3316     }
3317   }
3318   B->ops->sor = MatSOR_SeqBAIJ;
3319   b->mbs      = mbs;
3320   b->nbs      = nbs;
3321   if (!skipallocation) {
3322     if (!b->imax) {
3323       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
3324 
3325       b->free_imax_ilen = PETSC_TRUE;
3326     }
3327     /* b->ilen will count nonzeros in each block row so far. */
3328     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3329     if (!nnz) {
3330       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3331       else if (nz < 0) nz = 1;
3332       nz = PetscMin(nz, nbs);
3333       for (i = 0; i < mbs; i++) b->imax[i] = nz;
3334       PetscCall(PetscIntMultError(nz, mbs, &nz));
3335     } else {
3336       PetscInt64 nz64 = 0;
3337       for (i = 0; i < mbs; i++) {
3338         b->imax[i] = nnz[i];
3339         nz64 += nnz[i];
3340       }
3341       PetscCall(PetscIntCast(nz64, &nz));
3342     }
3343 
3344     /* allocate the matrix space */
3345     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3346     PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j));
3347     PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i));
3348     if (B->structure_only) {
3349       b->free_a = PETSC_FALSE;
3350     } else {
3351       PetscInt nzbs2 = 0;
3352       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
3353       PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a));
3354       b->free_a = PETSC_TRUE;
3355       PetscCall(PetscArrayzero(b->a, nzbs2));
3356     }
3357     b->free_ij = PETSC_TRUE;
3358     PetscCall(PetscArrayzero(b->j, nz));
3359 
3360     b->i[0] = 0;
3361     for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3362   } else {
3363     b->free_a  = PETSC_FALSE;
3364     b->free_ij = PETSC_FALSE;
3365   }
3366 
3367   b->bs2              = bs2;
3368   b->mbs              = mbs;
3369   b->nz               = 0;
3370   b->maxnz            = nz;
3371   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3372   B->was_assembled    = PETSC_FALSE;
3373   B->assembled        = PETSC_FALSE;
3374   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3375   PetscFunctionReturn(PETSC_SUCCESS);
3376 }
3377 
3378 static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3379 {
3380   PetscInt     i, m, nz, nz_max = 0, *nnz;
3381   PetscScalar *values      = NULL;
3382   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3383 
3384   PetscFunctionBegin;
3385   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
3386   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
3387   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
3388   PetscCall(PetscLayoutSetUp(B->rmap));
3389   PetscCall(PetscLayoutSetUp(B->cmap));
3390   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3391   m = B->rmap->n / bs;
3392 
3393   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
3394   PetscCall(PetscMalloc1(m + 1, &nnz));
3395   for (i = 0; i < m; i++) {
3396     nz = ii[i + 1] - ii[i];
3397     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3398     nz_max = PetscMax(nz_max, nz);
3399     nnz[i] = nz;
3400   }
3401   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
3402   PetscCall(PetscFree(nnz));
3403 
3404   values = (PetscScalar *)V;
3405   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3406   for (i = 0; i < m; i++) {
3407     PetscInt        ncols = ii[i + 1] - ii[i];
3408     const PetscInt *icols = jj + ii[i];
3409     if (bs == 1 || !roworiented) {
3410       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
3411       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
3412     } else {
3413       PetscInt j;
3414       for (j = 0; j < ncols; j++) {
3415         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
3416         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
3417       }
3418     }
3419   }
3420   if (!V) PetscCall(PetscFree(values));
3421   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3422   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3423   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3424   PetscFunctionReturn(PETSC_SUCCESS);
3425 }
3426 
3427 /*@C
3428   MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3429 
3430   Not Collective
3431 
3432   Input Parameter:
3433 . A - a `MATSEQBAIJ` matrix
3434 
3435   Output Parameter:
3436 . array - pointer to the data
3437 
3438   Level: intermediate
3439 
3440 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3441 @*/
3442 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[])
3443 {
3444   PetscFunctionBegin;
3445   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
3446   PetscFunctionReturn(PETSC_SUCCESS);
3447 }
3448 
3449 /*@C
3450   MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3451 
3452   Not Collective
3453 
3454   Input Parameters:
3455 + A     - a `MATSEQBAIJ` matrix
3456 - array - pointer to the data
3457 
3458   Level: intermediate
3459 
3460 .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3461 @*/
3462 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[])
3463 {
3464   PetscFunctionBegin;
3465   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
3466   PetscFunctionReturn(PETSC_SUCCESS);
3467 }
3468 
3469 /*MC
3470    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3471    block sparse compressed row format.
3472 
3473    Options Database Keys:
3474 + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()`
3475 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3476 
3477    Level: beginner
3478 
3479    Notes:
3480    `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
3481    space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
3482 
3483    Run with `-info` to see what version of the matrix-vector product is being used
3484 
3485 .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()`
3486 M*/
3487 
3488 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3489 
3490 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3491 {
3492   PetscMPIInt  size;
3493   Mat_SeqBAIJ *b;
3494 
3495   PetscFunctionBegin;
3496   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
3497   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3498 
3499   PetscCall(PetscNew(&b));
3500   B->data   = (void *)b;
3501   B->ops[0] = MatOps_Values;
3502 
3503   b->row          = NULL;
3504   b->col          = NULL;
3505   b->icol         = NULL;
3506   b->reallocs     = 0;
3507   b->saved_values = NULL;
3508 
3509   b->roworiented        = PETSC_TRUE;
3510   b->nonew              = 0;
3511   b->diag               = NULL;
3512   B->spptr              = NULL;
3513   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3514   b->keepnonzeropattern = PETSC_FALSE;
3515 
3516   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
3517   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
3518   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
3519   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
3520   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
3521   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
3522   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
3523   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
3524   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
3525   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
3526 #if defined(PETSC_HAVE_HYPRE)
3527   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
3528 #endif
3529   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
3530   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
3531   PetscFunctionReturn(PETSC_SUCCESS);
3532 }
3533 
3534 PETSC_INTERN PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3535 {
3536   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3537   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3538 
3539   PetscFunctionBegin;
3540   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix");
3541   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
3542 
3543   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3544     c->imax           = a->imax;
3545     c->ilen           = a->ilen;
3546     c->free_imax_ilen = PETSC_FALSE;
3547   } else {
3548     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3549     for (i = 0; i < mbs; i++) {
3550       c->imax[i] = a->imax[i];
3551       c->ilen[i] = a->ilen[i];
3552     }
3553     c->free_imax_ilen = PETSC_TRUE;
3554   }
3555 
3556   /* allocate the matrix space */
3557   if (mallocmatspace) {
3558     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3559       PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
3560       PetscCall(PetscArrayzero(c->a, bs2 * nz));
3561       c->free_a       = PETSC_TRUE;
3562       c->i            = a->i;
3563       c->j            = a->j;
3564       c->free_ij      = PETSC_FALSE;
3565       c->parent       = A;
3566       C->preallocated = PETSC_TRUE;
3567       C->assembled    = PETSC_TRUE;
3568 
3569       PetscCall(PetscObjectReference((PetscObject)A));
3570       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3571       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3572     } else {
3573       PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
3574       PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j));
3575       PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i));
3576       c->free_a  = PETSC_TRUE;
3577       c->free_ij = PETSC_TRUE;
3578 
3579       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3580       if (mbs > 0) {
3581         PetscCall(PetscArraycpy(c->j, a->j, nz));
3582         if (cpvalues == MAT_COPY_VALUES) {
3583           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
3584         } else {
3585           PetscCall(PetscArrayzero(c->a, bs2 * nz));
3586         }
3587       }
3588       C->preallocated = PETSC_TRUE;
3589       C->assembled    = PETSC_TRUE;
3590     }
3591   }
3592 
3593   c->roworiented = a->roworiented;
3594   c->nonew       = a->nonew;
3595 
3596   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
3597   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
3598 
3599   c->bs2        = a->bs2;
3600   c->mbs        = a->mbs;
3601   c->nbs        = a->nbs;
3602   c->nz         = a->nz;
3603   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3604   c->solve_work = NULL;
3605   c->mult_work  = NULL;
3606   c->sor_workt  = NULL;
3607   c->sor_work   = NULL;
3608 
3609   c->compressedrow.use   = a->compressedrow.use;
3610   c->compressedrow.nrows = a->compressedrow.nrows;
3611   if (a->compressedrow.use) {
3612     i = a->compressedrow.nrows;
3613     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
3614     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
3615     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
3616   } else {
3617     c->compressedrow.use    = PETSC_FALSE;
3618     c->compressedrow.i      = NULL;
3619     c->compressedrow.rindex = NULL;
3620   }
3621   c->nonzerorowcnt = a->nonzerorowcnt;
3622   C->nonzerostate  = A->nonzerostate;
3623 
3624   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
3625   PetscFunctionReturn(PETSC_SUCCESS);
3626 }
3627 
3628 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3629 {
3630   PetscFunctionBegin;
3631   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
3632   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
3633   PetscCall(MatSetType(*B, MATSEQBAIJ));
3634   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
3635   PetscFunctionReturn(PETSC_SUCCESS);
3636 }
3637 
3638 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3639 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3640 {
3641   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3642   PetscInt    *rowidxs, *colidxs;
3643   PetscScalar *matvals;
3644 
3645   PetscFunctionBegin;
3646   PetscCall(PetscViewerSetUp(viewer));
3647 
3648   /* read matrix header */
3649   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3650   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3651   M  = header[1];
3652   N  = header[2];
3653   nz = header[3];
3654   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3655   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3656   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3657 
3658   /* set block sizes from the viewer's .info file */
3659   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3660   /* set local and global sizes if not set already */
3661   if (mat->rmap->n < 0) mat->rmap->n = M;
3662   if (mat->cmap->n < 0) mat->cmap->n = N;
3663   if (mat->rmap->N < 0) mat->rmap->N = M;
3664   if (mat->cmap->N < 0) mat->cmap->N = N;
3665   PetscCall(PetscLayoutSetUp(mat->rmap));
3666   PetscCall(PetscLayoutSetUp(mat->cmap));
3667 
3668   /* check if the matrix sizes are correct */
3669   PetscCall(MatGetSize(mat, &rows, &cols));
3670   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3671   PetscCall(MatGetBlockSize(mat, &bs));
3672   PetscCall(MatGetLocalSize(mat, &m, &n));
3673   mbs = m / bs;
3674   nbs = n / bs;
3675 
3676   /* read in row lengths, column indices and nonzero values */
3677   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3678   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
3679   rowidxs[0] = 0;
3680   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3681   sum = rowidxs[m];
3682   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3683 
3684   /* read in column indices and nonzero values */
3685   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
3686   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
3687   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3688 
3689   {               /* preallocate matrix storage */
3690     PetscBT   bt; /* helper bit set to count nonzeros */
3691     PetscInt *nnz;
3692     PetscBool sbaij;
3693 
3694     PetscCall(PetscBTCreate(nbs, &bt));
3695     PetscCall(PetscCalloc1(mbs, &nnz));
3696     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3697     for (i = 0; i < mbs; i++) {
3698       PetscCall(PetscBTMemzero(nbs, bt));
3699       for (k = 0; k < bs; k++) {
3700         PetscInt row = bs * i + k;
3701         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3702           PetscInt col = colidxs[j];
3703           if (!sbaij || col >= row)
3704             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3705         }
3706       }
3707     }
3708     PetscCall(PetscBTDestroy(&bt));
3709     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
3710     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
3711     PetscCall(PetscFree(nnz));
3712   }
3713 
3714   /* store matrix values */
3715   for (i = 0; i < m; i++) {
3716     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
3717     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3718   }
3719 
3720   PetscCall(PetscFree(rowidxs));
3721   PetscCall(PetscFree2(colidxs, matvals));
3722   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3723   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3724   PetscFunctionReturn(PETSC_SUCCESS);
3725 }
3726 
3727 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3728 {
3729   PetscBool isbinary;
3730 
3731   PetscFunctionBegin;
3732   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3733   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3734   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
3735   PetscFunctionReturn(PETSC_SUCCESS);
3736 }
3737 
3738 /*@
3739   MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3740   compressed row) format.  For good matrix assembly performance the
3741   user should preallocate the matrix storage by setting the parameter `nz`
3742   (or the array `nnz`).
3743 
3744   Collective
3745 
3746   Input Parameters:
3747 + comm - MPI communicator, set to `PETSC_COMM_SELF`
3748 . bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3749          blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3750 . m    - number of rows
3751 . n    - number of columns
3752 . nz   - number of nonzero blocks  per block row (same for all rows)
3753 - nnz  - array containing the number of nonzero blocks in the various block rows
3754          (possibly different for each block row) or `NULL`
3755 
3756   Output Parameter:
3757 . A - the matrix
3758 
3759   Options Database Keys:
3760 + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3761 - -mat_block_size - size of the blocks to use
3762 
3763   Level: intermediate
3764 
3765   Notes:
3766   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3767   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3768   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
3769 
3770   The number of rows and columns must be divisible by blocksize.
3771 
3772   If the `nnz` parameter is given then the `nz` parameter is ignored
3773 
3774   A nonzero block is any block that as 1 or more nonzeros in it
3775 
3776   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3777   storage.  That is, the stored row and column indices can begin at
3778   either one (as in Fortran) or zero.
3779 
3780   Specify the preallocated storage with either `nz` or `nnz` (not both).
3781   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3782   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3783   matrices.
3784 
3785 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3786 @*/
3787 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3788 {
3789   PetscFunctionBegin;
3790   PetscCall(MatCreate(comm, A));
3791   PetscCall(MatSetSizes(*A, m, n, m, n));
3792   PetscCall(MatSetType(*A, MATSEQBAIJ));
3793   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
3794   PetscFunctionReturn(PETSC_SUCCESS);
3795 }
3796 
3797 /*@
3798   MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3799   per row in the matrix. For good matrix assembly performance the
3800   user should preallocate the matrix storage by setting the parameter `nz`
3801   (or the array `nnz`).
3802 
3803   Collective
3804 
3805   Input Parameters:
3806 + B   - the matrix
3807 . bs  - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3808         blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3809 . nz  - number of block nonzeros per block row (same for all rows)
3810 - nnz - array containing the number of block nonzeros in the various block rows
3811         (possibly different for each block row) or `NULL`
3812 
3813   Options Database Keys:
3814 + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
3815 - -mat_block_size - size of the blocks to use
3816 
3817   Level: intermediate
3818 
3819   Notes:
3820   If the `nnz` parameter is given then the `nz` parameter is ignored
3821 
3822   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3823   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3824   You can also run with the option `-info` and look for messages with the string
3825   malloc in them to see if additional memory allocation was needed.
3826 
3827   The `MATSEQBAIJ` format is fully compatible with standard Fortran
3828   storage.  That is, the stored row and column indices can begin at
3829   either one (as in Fortran) or zero.
3830 
3831   Specify the preallocated storage with either `nz` or `nnz` (not both).
3832   Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3833   allocation.  See [Sparse Matrices](sec_matsparse) for details.
3834 
3835 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3836 @*/
3837 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3838 {
3839   PetscFunctionBegin;
3840   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3841   PetscValidType(B, 1);
3842   PetscValidLogicalCollectiveInt(B, bs, 2);
3843   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
3844   PetscFunctionReturn(PETSC_SUCCESS);
3845 }
3846 
3847 /*@C
3848   MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3849 
3850   Collective
3851 
3852   Input Parameters:
3853 + B  - the matrix
3854 . bs - the blocksize
3855 . i  - the indices into `j` for the start of each local row (indices start with zero)
3856 . j  - the column indices for each local row (indices start with zero) these must be sorted for each row
3857 - v  - optional values in the matrix, use `NULL` if not provided
3858 
3859   Level: advanced
3860 
3861   Notes:
3862   The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()`
3863 
3864   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
3865   may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
3866   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3867   `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3868   block column and the second index is over columns within a block.
3869 
3870   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3871 
3872 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3873 @*/
3874 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3875 {
3876   PetscFunctionBegin;
3877   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3878   PetscValidType(B, 1);
3879   PetscValidLogicalCollectiveInt(B, bs, 2);
3880   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
3881   PetscFunctionReturn(PETSC_SUCCESS);
3882 }
3883 
3884 /*@
3885   MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3886 
3887   Collective
3888 
3889   Input Parameters:
3890 + comm - must be an MPI communicator of size 1
3891 . bs   - size of block
3892 . m    - number of rows
3893 . n    - number of columns
3894 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3895 . j    - column indices
3896 - a    - matrix values
3897 
3898   Output Parameter:
3899 . mat - the matrix
3900 
3901   Level: advanced
3902 
3903   Notes:
3904   The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays
3905   once the matrix is destroyed
3906 
3907   You cannot set new nonzero locations into this matrix, that will generate an error.
3908 
3909   The `i` and `j` indices are 0 based
3910 
3911   When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3912 
3913   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3914   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3915   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3916   with column-major ordering within blocks.
3917 
3918 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3919 @*/
3920 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3921 {
3922   Mat_SeqBAIJ *baij;
3923 
3924   PetscFunctionBegin;
3925   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
3926   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3927 
3928   PetscCall(MatCreate(comm, mat));
3929   PetscCall(MatSetSizes(*mat, m, n, m, n));
3930   PetscCall(MatSetType(*mat, MATSEQBAIJ));
3931   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3932   baij = (Mat_SeqBAIJ *)(*mat)->data;
3933   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3934 
3935   baij->i = i;
3936   baij->j = j;
3937   baij->a = a;
3938 
3939   baij->nonew          = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3940   baij->free_a         = PETSC_FALSE;
3941   baij->free_ij        = PETSC_FALSE;
3942   baij->free_imax_ilen = PETSC_TRUE;
3943 
3944   for (PetscInt ii = 0; ii < m; ii++) {
3945     const PetscInt row_len = i[ii + 1] - i[ii];
3946 
3947     baij->ilen[ii] = baij->imax[ii] = row_len;
3948     PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
3949   }
3950   if (PetscDefined(USE_DEBUG)) {
3951     for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
3952       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3953       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3954     }
3955   }
3956 
3957   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3958   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3959   PetscFunctionReturn(PETSC_SUCCESS);
3960 }
3961 
3962 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3963 {
3964   PetscFunctionBegin;
3965   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
3966   PetscFunctionReturn(PETSC_SUCCESS);
3967 }
3968