xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision bcee047adeeb73090d7e36cc71e39fc287cdbb97)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscblaslapack.h>
5 
6 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
7 {
8   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
9 
10   PetscFunctionBegin;
11 #if defined(PETSC_USE_LOG)
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13 #endif
14   PetscCall(MatStashDestroy_Private(&mat->stash));
15   PetscCall(MatStashDestroy_Private(&mat->bstash));
16   PetscCall(MatDestroy(&baij->A));
17   PetscCall(MatDestroy(&baij->B));
18 #if defined(PETSC_USE_CTABLE)
19   PetscCall(PetscHMapIDestroy(&baij->colmap));
20 #else
21   PetscCall(PetscFree(baij->colmap));
22 #endif
23   PetscCall(PetscFree(baij->garray));
24   PetscCall(VecDestroy(&baij->lvec));
25   PetscCall(VecScatterDestroy(&baij->Mvctx));
26   PetscCall(VecDestroy(&baij->slvec0));
27   PetscCall(VecDestroy(&baij->slvec0b));
28   PetscCall(VecDestroy(&baij->slvec1));
29   PetscCall(VecDestroy(&baij->slvec1a));
30   PetscCall(VecDestroy(&baij->slvec1b));
31   PetscCall(VecScatterDestroy(&baij->sMvctx));
32   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
33   PetscCall(PetscFree(baij->barray));
34   PetscCall(PetscFree(baij->hd));
35   PetscCall(VecDestroy(&baij->diag));
36   PetscCall(VecDestroy(&baij->bb1));
37   PetscCall(VecDestroy(&baij->xx1));
38 #if defined(PETSC_USE_REAL_MAT_SINGLE)
39   PetscCall(PetscFree(baij->setvaluescopy));
40 #endif
41   PetscCall(PetscFree(baij->in_loc));
42   PetscCall(PetscFree(baij->v_loc));
43   PetscCall(PetscFree(baij->rangebs));
44   PetscCall(PetscFree(mat->data));
45 
46   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
47   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
50   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
51 #if defined(PETSC_HAVE_ELEMENTAL)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
53 #endif
54 #if defined(PETSC_HAVE_SCALAPACK)
55   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
56 #endif
57   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
58   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
59   PetscFunctionReturn(PETSC_SUCCESS);
60 }
61 
62 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */
63 #define TYPE SBAIJ
64 #define TYPE_SBAIJ
65 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
66 #undef TYPE
67 #undef TYPE_SBAIJ
68 
69 #if defined(PETSC_HAVE_ELEMENTAL)
70 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
71 #endif
72 #if defined(PETSC_HAVE_SCALAPACK)
73 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
74 #endif
75 
76 /* This could be moved to matimpl.h */
77 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
78 {
79   Mat       preallocator;
80   PetscInt  r, rstart, rend;
81   PetscInt  bs, i, m, n, M, N;
82   PetscBool cong = PETSC_TRUE;
83 
84   PetscFunctionBegin;
85   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
86   PetscValidLogicalCollectiveInt(B, nm, 2);
87   for (i = 0; i < nm; i++) {
88     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
89     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
90     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
91   }
92   PetscValidLogicalCollectiveBool(B, fill, 5);
93   PetscCall(MatGetBlockSize(B, &bs));
94   PetscCall(MatGetSize(B, &M, &N));
95   PetscCall(MatGetLocalSize(B, &m, &n));
96   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
97   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
98   PetscCall(MatSetBlockSize(preallocator, bs));
99   PetscCall(MatSetSizes(preallocator, m, n, M, N));
100   PetscCall(MatSetUp(preallocator));
101   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
102   for (r = rstart; r < rend; ++r) {
103     PetscInt           ncols;
104     const PetscInt    *row;
105     const PetscScalar *vals;
106 
107     for (i = 0; i < nm; i++) {
108       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
109       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
110       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
111       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
112     }
113   }
114   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
115   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
116   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
117   PetscCall(MatDestroy(&preallocator));
118   PetscFunctionReturn(PETSC_SUCCESS);
119 }
120 
121 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
122 {
123   Mat      B;
124   PetscInt r;
125 
126   PetscFunctionBegin;
127   if (reuse != MAT_REUSE_MATRIX) {
128     PetscBool symm = PETSC_TRUE, isdense;
129     PetscInt  bs;
130 
131     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
132     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
133     PetscCall(MatSetType(B, newtype));
134     PetscCall(MatGetBlockSize(A, &bs));
135     PetscCall(MatSetBlockSize(B, bs));
136     PetscCall(PetscLayoutSetUp(B->rmap));
137     PetscCall(PetscLayoutSetUp(B->cmap));
138     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
139     if (!isdense) {
140       PetscCall(MatGetRowUpperTriangular(A));
141       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
142       PetscCall(MatRestoreRowUpperTriangular(A));
143     } else {
144       PetscCall(MatSetUp(B));
145     }
146   } else {
147     B = *newmat;
148     PetscCall(MatZeroEntries(B));
149   }
150 
151   PetscCall(MatGetRowUpperTriangular(A));
152   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
153     PetscInt           ncols;
154     const PetscInt    *row;
155     const PetscScalar *vals;
156 
157     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
158     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
159 #if defined(PETSC_USE_COMPLEX)
160     if (A->hermitian == PETSC_BOOL3_TRUE) {
161       PetscInt i;
162       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
163     } else {
164       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
165     }
166 #else
167     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
168 #endif
169     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
170   }
171   PetscCall(MatRestoreRowUpperTriangular(A));
172   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
173   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
174 
175   if (reuse == MAT_INPLACE_MATRIX) {
176     PetscCall(MatHeaderReplace(A, &B));
177   } else {
178     *newmat = B;
179   }
180   PetscFunctionReturn(PETSC_SUCCESS);
181 }
182 
183 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
184 {
185   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
186 
187   PetscFunctionBegin;
188   PetscCall(MatStoreValues(aij->A));
189   PetscCall(MatStoreValues(aij->B));
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
194 {
195   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
196 
197   PetscFunctionBegin;
198   PetscCall(MatRetrieveValues(aij->A));
199   PetscCall(MatRetrieveValues(aij->B));
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
203 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
204   { \
205     brow = row / bs; \
206     rp   = aj + ai[brow]; \
207     ap   = aa + bs2 * ai[brow]; \
208     rmax = aimax[brow]; \
209     nrow = ailen[brow]; \
210     bcol = col / bs; \
211     ridx = row % bs; \
212     cidx = col % bs; \
213     low  = 0; \
214     high = nrow; \
215     while (high - low > 3) { \
216       t = (low + high) / 2; \
217       if (rp[t] > bcol) high = t; \
218       else low = t; \
219     } \
220     for (_i = low; _i < high; _i++) { \
221       if (rp[_i] > bcol) break; \
222       if (rp[_i] == bcol) { \
223         bap = ap + bs2 * _i + bs * cidx + ridx; \
224         if (addv == ADD_VALUES) *bap += value; \
225         else *bap = value; \
226         goto a_noinsert; \
227       } \
228     } \
229     if (a->nonew == 1) goto a_noinsert; \
230     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
231     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
232     N = nrow++ - 1; \
233     /* shift up all the later entries in this row */ \
234     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
235     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
236     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
237     rp[_i]                          = bcol; \
238     ap[bs2 * _i + bs * cidx + ridx] = value; \
239     A->nonzerostate++; \
240   a_noinsert:; \
241     ailen[brow] = nrow; \
242   }
243 
244 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
245   { \
246     brow = row / bs; \
247     rp   = bj + bi[brow]; \
248     ap   = ba + bs2 * bi[brow]; \
249     rmax = bimax[brow]; \
250     nrow = bilen[brow]; \
251     bcol = col / bs; \
252     ridx = row % bs; \
253     cidx = col % bs; \
254     low  = 0; \
255     high = nrow; \
256     while (high - low > 3) { \
257       t = (low + high) / 2; \
258       if (rp[t] > bcol) high = t; \
259       else low = t; \
260     } \
261     for (_i = low; _i < high; _i++) { \
262       if (rp[_i] > bcol) break; \
263       if (rp[_i] == bcol) { \
264         bap = ap + bs2 * _i + bs * cidx + ridx; \
265         if (addv == ADD_VALUES) *bap += value; \
266         else *bap = value; \
267         goto b_noinsert; \
268       } \
269     } \
270     if (b->nonew == 1) goto b_noinsert; \
271     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
272     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
273     N = nrow++ - 1; \
274     /* shift up all the later entries in this row */ \
275     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
276     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
277     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
278     rp[_i]                          = bcol; \
279     ap[bs2 * _i + bs * cidx + ridx] = value; \
280     B->nonzerostate++; \
281   b_noinsert:; \
282     bilen[brow] = nrow; \
283   }
284 
285 /* Only add/insert a(i,j) with i<=j (blocks).
286    Any a(i,j) with i>j input by user is ignored or generates an error
287 */
288 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
289 {
290   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
291   MatScalar     value;
292   PetscBool     roworiented = baij->roworiented;
293   PetscInt      i, j, row, col;
294   PetscInt      rstart_orig = mat->rmap->rstart;
295   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
296   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
297 
298   /* Some Variables required in the macro */
299   Mat           A     = baij->A;
300   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)(A)->data;
301   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
302   MatScalar    *aa = a->a;
303 
304   Mat          B     = baij->B;
305   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
306   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
307   MatScalar   *ba = b->a;
308 
309   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
310   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
311   MatScalar *ap, *bap;
312 
313   /* for stash */
314   PetscInt   n_loc, *in_loc = NULL;
315   MatScalar *v_loc = NULL;
316 
317   PetscFunctionBegin;
318   if (!baij->donotstash) {
319     if (n > baij->n_loc) {
320       PetscCall(PetscFree(baij->in_loc));
321       PetscCall(PetscFree(baij->v_loc));
322       PetscCall(PetscMalloc1(n, &baij->in_loc));
323       PetscCall(PetscMalloc1(n, &baij->v_loc));
324 
325       baij->n_loc = n;
326     }
327     in_loc = baij->in_loc;
328     v_loc  = baij->v_loc;
329   }
330 
331   for (i = 0; i < m; i++) {
332     if (im[i] < 0) continue;
333     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
334     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
335       row = im[i] - rstart_orig;                     /* local row index */
336       for (j = 0; j < n; j++) {
337         if (im[i] / bs > in[j] / bs) {
338           if (a->ignore_ltriangular) {
339             continue; /* ignore lower triangular blocks */
340           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
341         }
342         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
343           col  = in[j] - cstart_orig;                    /* local col index */
344           brow = row / bs;
345           bcol = col / bs;
346           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
347           if (roworiented) value = v[i * n + j];
348           else value = v[i + j * m];
349           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
350           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
351         } else if (in[j] < 0) {
352           continue;
353         } else {
354           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
355           /* off-diag entry (B) */
356           if (mat->was_assembled) {
357             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
358 #if defined(PETSC_USE_CTABLE)
359             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
360             col = col - 1;
361 #else
362             col = baij->colmap[in[j] / bs] - 1;
363 #endif
364             if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) {
365               PetscCall(MatDisAssemble_MPISBAIJ(mat));
366               col = in[j];
367               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
368               B     = baij->B;
369               b     = (Mat_SeqBAIJ *)(B)->data;
370               bimax = b->imax;
371               bi    = b->i;
372               bilen = b->ilen;
373               bj    = b->j;
374               ba    = b->a;
375             } else col += in[j] % bs;
376           } else col = in[j];
377           if (roworiented) value = v[i * n + j];
378           else value = v[i + j * m];
379           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
380           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
381         }
382       }
383     } else { /* off processor entry */
384       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
385       if (!baij->donotstash) {
386         mat->assembled = PETSC_FALSE;
387         n_loc          = 0;
388         for (j = 0; j < n; j++) {
389           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
390           in_loc[n_loc] = in[j];
391           if (roworiented) {
392             v_loc[n_loc] = v[i * n + j];
393           } else {
394             v_loc[n_loc] = v[j * m + i];
395           }
396           n_loc++;
397         }
398         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
399       }
400     }
401   }
402   PetscFunctionReturn(PETSC_SUCCESS);
403 }
404 
405 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
406 {
407   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
408   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
409   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
410   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
411   PetscBool          roworiented = a->roworiented;
412   const PetscScalar *value       = v;
413   MatScalar         *ap, *aa = a->a, *bap;
414 
415   PetscFunctionBegin;
416   if (col < row) {
417     if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
418     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
419   }
420   rp    = aj + ai[row];
421   ap    = aa + bs2 * ai[row];
422   rmax  = imax[row];
423   nrow  = ailen[row];
424   value = v;
425   low   = 0;
426   high  = nrow;
427 
428   while (high - low > 7) {
429     t = (low + high) / 2;
430     if (rp[t] > col) high = t;
431     else low = t;
432   }
433   for (i = low; i < high; i++) {
434     if (rp[i] > col) break;
435     if (rp[i] == col) {
436       bap = ap + bs2 * i;
437       if (roworiented) {
438         if (is == ADD_VALUES) {
439           for (ii = 0; ii < bs; ii++) {
440             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
441           }
442         } else {
443           for (ii = 0; ii < bs; ii++) {
444             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
445           }
446         }
447       } else {
448         if (is == ADD_VALUES) {
449           for (ii = 0; ii < bs; ii++) {
450             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
451           }
452         } else {
453           for (ii = 0; ii < bs; ii++) {
454             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
455           }
456         }
457       }
458       goto noinsert2;
459     }
460   }
461   if (nonew == 1) goto noinsert2;
462   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
463   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
464   N = nrow++ - 1;
465   high++;
466   /* shift up all the later entries in this row */
467   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
468   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
469   rp[i] = col;
470   bap   = ap + bs2 * i;
471   if (roworiented) {
472     for (ii = 0; ii < bs; ii++) {
473       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
474     }
475   } else {
476     for (ii = 0; ii < bs; ii++) {
477       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
478     }
479   }
480 noinsert2:;
481   ailen[row] = nrow;
482   PetscFunctionReturn(PETSC_SUCCESS);
483 }
484 
485 /*
486    This routine is exactly duplicated in mpibaij.c
487 */
488 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
489 {
490   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
491   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
492   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
493   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
494   PetscBool          roworiented = a->roworiented;
495   const PetscScalar *value       = v;
496   MatScalar         *ap, *aa = a->a, *bap;
497 
498   PetscFunctionBegin;
499   rp    = aj + ai[row];
500   ap    = aa + bs2 * ai[row];
501   rmax  = imax[row];
502   nrow  = ailen[row];
503   low   = 0;
504   high  = nrow;
505   value = v;
506   while (high - low > 7) {
507     t = (low + high) / 2;
508     if (rp[t] > col) high = t;
509     else low = t;
510   }
511   for (i = low; i < high; i++) {
512     if (rp[i] > col) break;
513     if (rp[i] == col) {
514       bap = ap + bs2 * i;
515       if (roworiented) {
516         if (is == ADD_VALUES) {
517           for (ii = 0; ii < bs; ii++) {
518             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
519           }
520         } else {
521           for (ii = 0; ii < bs; ii++) {
522             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
523           }
524         }
525       } else {
526         if (is == ADD_VALUES) {
527           for (ii = 0; ii < bs; ii++, value += bs) {
528             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
529             bap += bs;
530           }
531         } else {
532           for (ii = 0; ii < bs; ii++, value += bs) {
533             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
534             bap += bs;
535           }
536         }
537       }
538       goto noinsert2;
539     }
540   }
541   if (nonew == 1) goto noinsert2;
542   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
543   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
544   N = nrow++ - 1;
545   high++;
546   /* shift up all the later entries in this row */
547   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
548   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
549   rp[i] = col;
550   bap   = ap + bs2 * i;
551   if (roworiented) {
552     for (ii = 0; ii < bs; ii++) {
553       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
554     }
555   } else {
556     for (ii = 0; ii < bs; ii++) {
557       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
558     }
559   }
560 noinsert2:;
561   ailen[row] = nrow;
562   PetscFunctionReturn(PETSC_SUCCESS);
563 }
564 
565 /*
566     This routine could be optimized by removing the need for the block copy below and passing stride information
567   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
568 */
569 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
570 {
571   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
572   const MatScalar *value;
573   MatScalar       *barray      = baij->barray;
574   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
575   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
576   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
577   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
578 
579   PetscFunctionBegin;
580   if (!barray) {
581     PetscCall(PetscMalloc1(bs2, &barray));
582     baij->barray = barray;
583   }
584 
585   if (roworiented) {
586     stepval = (n - 1) * bs;
587   } else {
588     stepval = (m - 1) * bs;
589   }
590   for (i = 0; i < m; i++) {
591     if (im[i] < 0) continue;
592     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
593     if (im[i] >= rstart && im[i] < rend) {
594       row = im[i] - rstart;
595       for (j = 0; j < n; j++) {
596         if (im[i] > in[j]) {
597           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
598           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
599         }
600         /* If NumCol = 1 then a copy is not required */
601         if ((roworiented) && (n == 1)) {
602           barray = (MatScalar *)v + i * bs2;
603         } else if ((!roworiented) && (m == 1)) {
604           barray = (MatScalar *)v + j * bs2;
605         } else { /* Here a copy is required */
606           if (roworiented) {
607             value = v + i * (stepval + bs) * bs + j * bs;
608           } else {
609             value = v + j * (stepval + bs) * bs + i * bs;
610           }
611           for (ii = 0; ii < bs; ii++, value += stepval) {
612             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
613           }
614           barray -= bs2;
615         }
616 
617         if (in[j] >= cstart && in[j] < cend) {
618           col = in[j] - cstart;
619           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
620         } else if (in[j] < 0) {
621           continue;
622         } else {
623           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
624           if (mat->was_assembled) {
625             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
626 
627 #if defined(PETSC_USE_DEBUG)
628   #if defined(PETSC_USE_CTABLE)
629             {
630               PetscInt data;
631               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
632               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
633             }
634   #else
635             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
636   #endif
637 #endif
638 #if defined(PETSC_USE_CTABLE)
639             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
640             col = (col - 1) / bs;
641 #else
642             col = (baij->colmap[in[j]] - 1) / bs;
643 #endif
644             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
645               PetscCall(MatDisAssemble_MPISBAIJ(mat));
646               col = in[j];
647             }
648           } else col = in[j];
649           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
650         }
651       }
652     } else {
653       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
654       if (!baij->donotstash) {
655         if (roworiented) {
656           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
657         } else {
658           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
659         }
660       }
661     }
662   }
663   PetscFunctionReturn(PETSC_SUCCESS);
664 }
665 
666 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
667 {
668   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
669   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
670   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
671 
672   PetscFunctionBegin;
673   for (i = 0; i < m; i++) {
674     if (idxm[i] < 0) continue; /* negative row */
675     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
676     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
677       row = idxm[i] - bsrstart;
678       for (j = 0; j < n; j++) {
679         if (idxn[j] < 0) continue; /* negative column */
680         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
681         if (idxn[j] >= bscstart && idxn[j] < bscend) {
682           col = idxn[j] - bscstart;
683           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
684         } else {
685           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
686 #if defined(PETSC_USE_CTABLE)
687           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
688           data--;
689 #else
690           data = baij->colmap[idxn[j] / bs] - 1;
691 #endif
692           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
693           else {
694             col = data + idxn[j] % bs;
695             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
696           }
697         }
698       }
699     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
700   }
701   PetscFunctionReturn(PETSC_SUCCESS);
702 }
703 
704 PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
705 {
706   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
707   PetscReal     sum[2], *lnorm2;
708 
709   PetscFunctionBegin;
710   if (baij->size == 1) {
711     PetscCall(MatNorm(baij->A, type, norm));
712   } else {
713     if (type == NORM_FROBENIUS) {
714       PetscCall(PetscMalloc1(2, &lnorm2));
715       PetscCall(MatNorm(baij->A, type, lnorm2));
716       *lnorm2 = (*lnorm2) * (*lnorm2);
717       lnorm2++; /* squar power of norm(A) */
718       PetscCall(MatNorm(baij->B, type, lnorm2));
719       *lnorm2 = (*lnorm2) * (*lnorm2);
720       lnorm2--; /* squar power of norm(B) */
721       PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
722       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
723       PetscCall(PetscFree(lnorm2));
724     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
725       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
726       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
727       PetscReal    *rsum, *rsum2, vabs;
728       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
729       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
730       MatScalar    *v;
731 
732       PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
733       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
734       /* Amat */
735       v  = amat->a;
736       jj = amat->j;
737       for (brow = 0; brow < mbs; brow++) {
738         grow = bs * (rstart + brow);
739         nz   = amat->i[brow + 1] - amat->i[brow];
740         for (bcol = 0; bcol < nz; bcol++) {
741           gcol = bs * (rstart + *jj);
742           jj++;
743           for (col = 0; col < bs; col++) {
744             for (row = 0; row < bs; row++) {
745               vabs = PetscAbsScalar(*v);
746               v++;
747               rsum[gcol + col] += vabs;
748               /* non-diagonal block */
749               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
750             }
751           }
752         }
753         PetscCall(PetscLogFlops(nz * bs * bs));
754       }
755       /* Bmat */
756       v  = bmat->a;
757       jj = bmat->j;
758       for (brow = 0; brow < mbs; brow++) {
759         grow = bs * (rstart + brow);
760         nz   = bmat->i[brow + 1] - bmat->i[brow];
761         for (bcol = 0; bcol < nz; bcol++) {
762           gcol = bs * garray[*jj];
763           jj++;
764           for (col = 0; col < bs; col++) {
765             for (row = 0; row < bs; row++) {
766               vabs = PetscAbsScalar(*v);
767               v++;
768               rsum[gcol + col] += vabs;
769               rsum[grow + row] += vabs;
770             }
771           }
772         }
773         PetscCall(PetscLogFlops(nz * bs * bs));
774       }
775       PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
776       *norm = 0.0;
777       for (col = 0; col < mat->cmap->N; col++) {
778         if (rsum2[col] > *norm) *norm = rsum2[col];
779       }
780       PetscCall(PetscFree2(rsum, rsum2));
781     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
782   }
783   PetscFunctionReturn(PETSC_SUCCESS);
784 }
785 
786 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
787 {
788   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
789   PetscInt      nstash, reallocs;
790 
791   PetscFunctionBegin;
792   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
793 
794   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
795   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
796   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
797   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
798   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
799   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
800   PetscFunctionReturn(PETSC_SUCCESS);
801 }
802 
803 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
804 {
805   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
806   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
807   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
808   PetscInt     *row, *col;
809   PetscBool     other_disassembled;
810   PetscMPIInt   n;
811   PetscBool     r1, r2, r3;
812   MatScalar    *val;
813 
814   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
815   PetscFunctionBegin;
816   if (!baij->donotstash && !mat->nooffprocentries) {
817     while (1) {
818       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
819       if (!flg) break;
820 
821       for (i = 0; i < n;) {
822         /* Now identify the consecutive vals belonging to the same row */
823         for (j = i, rstart = row[j]; j < n; j++) {
824           if (row[j] != rstart) break;
825         }
826         if (j < n) ncols = j - i;
827         else ncols = n - i;
828         /* Now assemble all these values with a single function call */
829         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
830         i = j;
831       }
832     }
833     PetscCall(MatStashScatterEnd_Private(&mat->stash));
834     /* Now process the block-stash. Since the values are stashed column-oriented,
835        set the roworiented flag to column oriented, and after MatSetValues()
836        restore the original flags */
837     r1 = baij->roworiented;
838     r2 = a->roworiented;
839     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
840 
841     baij->roworiented = PETSC_FALSE;
842     a->roworiented    = PETSC_FALSE;
843 
844     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
845     while (1) {
846       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
847       if (!flg) break;
848 
849       for (i = 0; i < n;) {
850         /* Now identify the consecutive vals belonging to the same row */
851         for (j = i, rstart = row[j]; j < n; j++) {
852           if (row[j] != rstart) break;
853         }
854         if (j < n) ncols = j - i;
855         else ncols = n - i;
856         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
857         i = j;
858       }
859     }
860     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
861 
862     baij->roworiented = r1;
863     a->roworiented    = r2;
864 
865     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
866   }
867 
868   PetscCall(MatAssemblyBegin(baij->A, mode));
869   PetscCall(MatAssemblyEnd(baij->A, mode));
870 
871   /* determine if any processor has disassembled, if so we must
872      also disassemble ourselves, in order that we may reassemble. */
873   /*
874      if nonzero structure of submatrix B cannot change then we know that
875      no processor disassembled thus we can skip this stuff
876   */
877   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
878     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
879     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
880   }
881 
882   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
883   PetscCall(MatAssemblyBegin(baij->B, mode));
884   PetscCall(MatAssemblyEnd(baij->B, mode));
885 
886   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
887 
888   baij->rowvalues = NULL;
889 
890   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
891   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
892     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
893     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
894   }
895   PetscFunctionReturn(PETSC_SUCCESS);
896 }
897 
898 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
899 #include <petscdraw.h>
900 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
901 {
902   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
903   PetscInt          bs   = mat->rmap->bs;
904   PetscMPIInt       rank = baij->rank;
905   PetscBool         iascii, isdraw;
906   PetscViewer       sviewer;
907   PetscViewerFormat format;
908 
909   PetscFunctionBegin;
910   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
911   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
912   if (iascii) {
913     PetscCall(PetscViewerGetFormat(viewer, &format));
914     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
915       MatInfo info;
916       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
917       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
918       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
919       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
920                                                    mat->rmap->bs, (double)info.memory));
921       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
922       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
923       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
924       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
925       PetscCall(PetscViewerFlush(viewer));
926       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
927       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
928       PetscCall(VecScatterView(baij->Mvctx, viewer));
929       PetscFunctionReturn(PETSC_SUCCESS);
930     } else if (format == PETSC_VIEWER_ASCII_INFO) {
931       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
932       PetscFunctionReturn(PETSC_SUCCESS);
933     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
934       PetscFunctionReturn(PETSC_SUCCESS);
935     }
936   }
937 
938   if (isdraw) {
939     PetscDraw draw;
940     PetscBool isnull;
941     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
942     PetscCall(PetscDrawIsNull(draw, &isnull));
943     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
944   }
945 
946   {
947     /* assemble the entire matrix onto first processor. */
948     Mat           A;
949     Mat_SeqSBAIJ *Aloc;
950     Mat_SeqBAIJ  *Bloc;
951     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
952     MatScalar    *a;
953     const char   *matname;
954 
955     /* Should this be the same type as mat? */
956     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
957     if (rank == 0) {
958       PetscCall(MatSetSizes(A, M, N, M, N));
959     } else {
960       PetscCall(MatSetSizes(A, 0, 0, M, N));
961     }
962     PetscCall(MatSetType(A, MATMPISBAIJ));
963     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
964     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
965 
966     /* copy over the A part */
967     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
968     ai   = Aloc->i;
969     aj   = Aloc->j;
970     a    = Aloc->a;
971     PetscCall(PetscMalloc1(bs, &rvals));
972 
973     for (i = 0; i < mbs; i++) {
974       rvals[0] = bs * (baij->rstartbs + i);
975       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
976       for (j = ai[i]; j < ai[i + 1]; j++) {
977         col = (baij->cstartbs + aj[j]) * bs;
978         for (k = 0; k < bs; k++) {
979           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
980           col++;
981           a += bs;
982         }
983       }
984     }
985     /* copy over the B part */
986     Bloc = (Mat_SeqBAIJ *)baij->B->data;
987     ai   = Bloc->i;
988     aj   = Bloc->j;
989     a    = Bloc->a;
990     for (i = 0; i < mbs; i++) {
991       rvals[0] = bs * (baij->rstartbs + i);
992       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
993       for (j = ai[i]; j < ai[i + 1]; j++) {
994         col = baij->garray[aj[j]] * bs;
995         for (k = 0; k < bs; k++) {
996           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
997           col++;
998           a += bs;
999         }
1000       }
1001     }
1002     PetscCall(PetscFree(rvals));
1003     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1004     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1005     /*
1006        Everyone has to call to draw the matrix since the graphics waits are
1007        synchronized across all processors that share the PetscDraw object
1008     */
1009     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1010     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1011     if (rank == 0) {
1012       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname));
1013       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer));
1014     }
1015     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1016     PetscCall(PetscViewerFlush(viewer));
1017     PetscCall(MatDestroy(&A));
1018   }
1019   PetscFunctionReturn(PETSC_SUCCESS);
1020 }
1021 
1022 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1023 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
1024 
1025 PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
1026 {
1027   PetscBool iascii, isdraw, issocket, isbinary;
1028 
1029   PetscFunctionBegin;
1030   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1031   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1032   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1033   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1034   if (iascii || isdraw || issocket) {
1035     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
1036   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
1037   PetscFunctionReturn(PETSC_SUCCESS);
1038 }
1039 
1040 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1041 {
1042   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1043   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1044   PetscScalar       *from;
1045   const PetscScalar *x;
1046 
1047   PetscFunctionBegin;
1048   /* diagonal part */
1049   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1050   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1051   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1052   PetscCall(VecZeroEntries(a->slvec1b));
1053 
1054   /* subdiagonal part */
1055   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1056   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1057 
1058   /* copy x into the vec slvec0 */
1059   PetscCall(VecGetArray(a->slvec0, &from));
1060   PetscCall(VecGetArrayRead(xx, &x));
1061 
1062   PetscCall(PetscArraycpy(from, x, bs * mbs));
1063   PetscCall(VecRestoreArray(a->slvec0, &from));
1064   PetscCall(VecRestoreArrayRead(xx, &x));
1065 
1066   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1067   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1068   /* supperdiagonal part */
1069   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1070   PetscFunctionReturn(PETSC_SUCCESS);
1071 }
1072 
1073 PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1074 {
1075   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1076   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1077   PetscScalar       *from;
1078   const PetscScalar *x;
1079 
1080   PetscFunctionBegin;
1081   /* diagonal part */
1082   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1083   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1084   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1085   PetscCall(VecZeroEntries(a->slvec1b));
1086 
1087   /* subdiagonal part */
1088   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1089 
1090   /* copy x into the vec slvec0 */
1091   PetscCall(VecGetArray(a->slvec0, &from));
1092   PetscCall(VecGetArrayRead(xx, &x));
1093 
1094   PetscCall(PetscArraycpy(from, x, bs * mbs));
1095   PetscCall(VecRestoreArray(a->slvec0, &from));
1096   PetscCall(VecRestoreArrayRead(xx, &x));
1097 
1098   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1099   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1100   /* supperdiagonal part */
1101   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1102   PetscFunctionReturn(PETSC_SUCCESS);
1103 }
1104 
1105 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1106 {
1107   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1108   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1109   PetscScalar       *from;
1110   const PetscScalar *x;
1111 
1112   PetscFunctionBegin;
1113   /* diagonal part */
1114   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1115   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1116   PetscCall(VecZeroEntries(a->slvec1b));
1117 
1118   /* subdiagonal part */
1119   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1120   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1121 
1122   /* copy x into the vec slvec0 */
1123   PetscCall(VecGetArray(a->slvec0, &from));
1124   PetscCall(VecGetArrayRead(xx, &x));
1125   PetscCall(PetscArraycpy(from, x, bs * mbs));
1126   PetscCall(VecRestoreArray(a->slvec0, &from));
1127 
1128   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1129   PetscCall(VecRestoreArrayRead(xx, &x));
1130   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1131 
1132   /* supperdiagonal part */
1133   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1134   PetscFunctionReturn(PETSC_SUCCESS);
1135 }
1136 
1137 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1138 {
1139   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1140   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1141   PetscScalar       *from;
1142   const PetscScalar *x;
1143 
1144   PetscFunctionBegin;
1145   /* diagonal part */
1146   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1147   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1148   PetscCall(VecZeroEntries(a->slvec1b));
1149 
1150   /* subdiagonal part */
1151   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1152 
1153   /* copy x into the vec slvec0 */
1154   PetscCall(VecGetArray(a->slvec0, &from));
1155   PetscCall(VecGetArrayRead(xx, &x));
1156   PetscCall(PetscArraycpy(from, x, bs * mbs));
1157   PetscCall(VecRestoreArray(a->slvec0, &from));
1158 
1159   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1160   PetscCall(VecRestoreArrayRead(xx, &x));
1161   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1162 
1163   /* supperdiagonal part */
1164   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1165   PetscFunctionReturn(PETSC_SUCCESS);
1166 }
1167 
1168 /*
1169   This only works correctly for square matrices where the subblock A->A is the
1170    diagonal block
1171 */
1172 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1173 {
1174   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1175 
1176   PetscFunctionBegin;
1177   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1178   PetscCall(MatGetDiagonal(a->A, v));
1179   PetscFunctionReturn(PETSC_SUCCESS);
1180 }
1181 
1182 PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1183 {
1184   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1185 
1186   PetscFunctionBegin;
1187   PetscCall(MatScale(a->A, aa));
1188   PetscCall(MatScale(a->B, aa));
1189   PetscFunctionReturn(PETSC_SUCCESS);
1190 }
1191 
1192 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1193 {
1194   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1195   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1196   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1197   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1198   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1199 
1200   PetscFunctionBegin;
1201   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1202   mat->getrowactive = PETSC_TRUE;
1203 
1204   if (!mat->rowvalues && (idx || v)) {
1205     /*
1206         allocate enough space to hold information from the longest row.
1207     */
1208     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1209     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1210     PetscInt      max = 1, mbs = mat->mbs, tmp;
1211     for (i = 0; i < mbs; i++) {
1212       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1213       if (max < tmp) max = tmp;
1214     }
1215     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1216   }
1217 
1218   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1219   lrow = row - brstart; /* local row index */
1220 
1221   pvA = &vworkA;
1222   pcA = &cworkA;
1223   pvB = &vworkB;
1224   pcB = &cworkB;
1225   if (!v) {
1226     pvA = NULL;
1227     pvB = NULL;
1228   }
1229   if (!idx) {
1230     pcA = NULL;
1231     if (!v) pcB = NULL;
1232   }
1233   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1234   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1235   nztot = nzA + nzB;
1236 
1237   cmap = mat->garray;
1238   if (v || idx) {
1239     if (nztot) {
1240       /* Sort by increasing column numbers, assuming A and B already sorted */
1241       PetscInt imark = -1;
1242       if (v) {
1243         *v = v_p = mat->rowvalues;
1244         for (i = 0; i < nzB; i++) {
1245           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1246           else break;
1247         }
1248         imark = i;
1249         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1250         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1251       }
1252       if (idx) {
1253         *idx = idx_p = mat->rowindices;
1254         if (imark > -1) {
1255           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1256         } else {
1257           for (i = 0; i < nzB; i++) {
1258             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1259             else break;
1260           }
1261           imark = i;
1262         }
1263         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1264         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1265       }
1266     } else {
1267       if (idx) *idx = NULL;
1268       if (v) *v = NULL;
1269     }
1270   }
1271   *nz = nztot;
1272   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1273   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1274   PetscFunctionReturn(PETSC_SUCCESS);
1275 }
1276 
1277 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1278 {
1279   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1280 
1281   PetscFunctionBegin;
1282   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1283   baij->getrowactive = PETSC_FALSE;
1284   PetscFunctionReturn(PETSC_SUCCESS);
1285 }
1286 
1287 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1288 {
1289   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1290   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1291 
1292   PetscFunctionBegin;
1293   aA->getrow_utriangular = PETSC_TRUE;
1294   PetscFunctionReturn(PETSC_SUCCESS);
1295 }
1296 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1297 {
1298   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1299   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1300 
1301   PetscFunctionBegin;
1302   aA->getrow_utriangular = PETSC_FALSE;
1303   PetscFunctionReturn(PETSC_SUCCESS);
1304 }
1305 
1306 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1307 {
1308   PetscFunctionBegin;
1309   if (PetscDefined(USE_COMPLEX)) {
1310     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1311 
1312     PetscCall(MatConjugate(a->A));
1313     PetscCall(MatConjugate(a->B));
1314   }
1315   PetscFunctionReturn(PETSC_SUCCESS);
1316 }
1317 
1318 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1319 {
1320   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1321 
1322   PetscFunctionBegin;
1323   PetscCall(MatRealPart(a->A));
1324   PetscCall(MatRealPart(a->B));
1325   PetscFunctionReturn(PETSC_SUCCESS);
1326 }
1327 
1328 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1329 {
1330   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1331 
1332   PetscFunctionBegin;
1333   PetscCall(MatImaginaryPart(a->A));
1334   PetscCall(MatImaginaryPart(a->B));
1335   PetscFunctionReturn(PETSC_SUCCESS);
1336 }
1337 
1338 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1339    Input: isrow       - distributed(parallel),
1340           iscol_local - locally owned (seq)
1341 */
1342 PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1343 {
1344   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
1345   const PetscInt *ptr1, *ptr2;
1346 
1347   PetscFunctionBegin;
1348   PetscCall(ISGetLocalSize(isrow, &sz1));
1349   PetscCall(ISGetLocalSize(iscol_local, &sz2));
1350   if (sz1 > sz2) {
1351     *flg = PETSC_FALSE;
1352     PetscFunctionReturn(PETSC_SUCCESS);
1353   }
1354 
1355   PetscCall(ISGetIndices(isrow, &ptr1));
1356   PetscCall(ISGetIndices(iscol_local, &ptr2));
1357 
1358   PetscCall(PetscMalloc1(sz1, &a1));
1359   PetscCall(PetscMalloc1(sz2, &a2));
1360   PetscCall(PetscArraycpy(a1, ptr1, sz1));
1361   PetscCall(PetscArraycpy(a2, ptr2, sz2));
1362   PetscCall(PetscSortInt(sz1, a1));
1363   PetscCall(PetscSortInt(sz2, a2));
1364 
1365   nmatch = 0;
1366   k      = 0;
1367   for (i = 0; i < sz1; i++) {
1368     for (j = k; j < sz2; j++) {
1369       if (a1[i] == a2[j]) {
1370         k = j;
1371         nmatch++;
1372         break;
1373       }
1374     }
1375   }
1376   PetscCall(ISRestoreIndices(isrow, &ptr1));
1377   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1378   PetscCall(PetscFree(a1));
1379   PetscCall(PetscFree(a2));
1380   if (nmatch < sz1) {
1381     *flg = PETSC_FALSE;
1382   } else {
1383     *flg = PETSC_TRUE;
1384   }
1385   PetscFunctionReturn(PETSC_SUCCESS);
1386 }
1387 
1388 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1389 {
1390   IS        iscol_local;
1391   PetscInt  csize;
1392   PetscBool isequal;
1393 
1394   PetscFunctionBegin;
1395   PetscCall(ISGetLocalSize(iscol, &csize));
1396   if (call == MAT_REUSE_MATRIX) {
1397     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1398     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1399   } else {
1400     PetscBool issorted;
1401 
1402     PetscCall(ISAllGather(iscol, &iscol_local));
1403     PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1404     PetscCall(ISSorted(iscol_local, &issorted));
1405     PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted");
1406   }
1407 
1408   /* now call MatCreateSubMatrix_MPIBAIJ() */
1409   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1410   if (call == MAT_INITIAL_MATRIX) {
1411     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1412     PetscCall(ISDestroy(&iscol_local));
1413   }
1414   PetscFunctionReturn(PETSC_SUCCESS);
1415 }
1416 
1417 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1418 {
1419   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1420 
1421   PetscFunctionBegin;
1422   PetscCall(MatZeroEntries(l->A));
1423   PetscCall(MatZeroEntries(l->B));
1424   PetscFunctionReturn(PETSC_SUCCESS);
1425 }
1426 
1427 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1428 {
1429   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1430   Mat            A = a->A, B = a->B;
1431   PetscLogDouble isend[5], irecv[5];
1432 
1433   PetscFunctionBegin;
1434   info->block_size = (PetscReal)matin->rmap->bs;
1435 
1436   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1437 
1438   isend[0] = info->nz_used;
1439   isend[1] = info->nz_allocated;
1440   isend[2] = info->nz_unneeded;
1441   isend[3] = info->memory;
1442   isend[4] = info->mallocs;
1443 
1444   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1445 
1446   isend[0] += info->nz_used;
1447   isend[1] += info->nz_allocated;
1448   isend[2] += info->nz_unneeded;
1449   isend[3] += info->memory;
1450   isend[4] += info->mallocs;
1451   if (flag == MAT_LOCAL) {
1452     info->nz_used      = isend[0];
1453     info->nz_allocated = isend[1];
1454     info->nz_unneeded  = isend[2];
1455     info->memory       = isend[3];
1456     info->mallocs      = isend[4];
1457   } else if (flag == MAT_GLOBAL_MAX) {
1458     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1459 
1460     info->nz_used      = irecv[0];
1461     info->nz_allocated = irecv[1];
1462     info->nz_unneeded  = irecv[2];
1463     info->memory       = irecv[3];
1464     info->mallocs      = irecv[4];
1465   } else if (flag == MAT_GLOBAL_SUM) {
1466     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1467 
1468     info->nz_used      = irecv[0];
1469     info->nz_allocated = irecv[1];
1470     info->nz_unneeded  = irecv[2];
1471     info->memory       = irecv[3];
1472     info->mallocs      = irecv[4];
1473   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1474   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1475   info->fill_ratio_needed = 0;
1476   info->factor_mallocs    = 0;
1477   PetscFunctionReturn(PETSC_SUCCESS);
1478 }
1479 
1480 PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1481 {
1482   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1483   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1484 
1485   PetscFunctionBegin;
1486   switch (op) {
1487   case MAT_NEW_NONZERO_LOCATIONS:
1488   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1489   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1490   case MAT_KEEP_NONZERO_PATTERN:
1491   case MAT_SUBMAT_SINGLEIS:
1492   case MAT_NEW_NONZERO_LOCATION_ERR:
1493     MatCheckPreallocated(A, 1);
1494     PetscCall(MatSetOption(a->A, op, flg));
1495     PetscCall(MatSetOption(a->B, op, flg));
1496     break;
1497   case MAT_ROW_ORIENTED:
1498     MatCheckPreallocated(A, 1);
1499     a->roworiented = flg;
1500 
1501     PetscCall(MatSetOption(a->A, op, flg));
1502     PetscCall(MatSetOption(a->B, op, flg));
1503     break;
1504   case MAT_FORCE_DIAGONAL_ENTRIES:
1505   case MAT_SORTED_FULL:
1506     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1507     break;
1508   case MAT_IGNORE_OFF_PROC_ENTRIES:
1509     a->donotstash = flg;
1510     break;
1511   case MAT_USE_HASH_TABLE:
1512     a->ht_flag = flg;
1513     break;
1514   case MAT_HERMITIAN:
1515     MatCheckPreallocated(A, 1);
1516     PetscCall(MatSetOption(a->A, op, flg));
1517 #if defined(PETSC_USE_COMPLEX)
1518     if (flg) { /* need different mat-vec ops */
1519       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1520       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1521       A->ops->multtranspose    = NULL;
1522       A->ops->multtransposeadd = NULL;
1523       A->symmetric             = PETSC_BOOL3_FALSE;
1524     }
1525 #endif
1526     break;
1527   case MAT_SPD:
1528   case MAT_SYMMETRIC:
1529     MatCheckPreallocated(A, 1);
1530     PetscCall(MatSetOption(a->A, op, flg));
1531 #if defined(PETSC_USE_COMPLEX)
1532     if (flg) { /* restore to use default mat-vec ops */
1533       A->ops->mult             = MatMult_MPISBAIJ;
1534       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1535       A->ops->multtranspose    = MatMult_MPISBAIJ;
1536       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1537     }
1538 #endif
1539     break;
1540   case MAT_STRUCTURALLY_SYMMETRIC:
1541     MatCheckPreallocated(A, 1);
1542     PetscCall(MatSetOption(a->A, op, flg));
1543     break;
1544   case MAT_SYMMETRY_ETERNAL:
1545   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1546     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
1547     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1548     break;
1549   case MAT_SPD_ETERNAL:
1550     break;
1551   case MAT_IGNORE_LOWER_TRIANGULAR:
1552     aA->ignore_ltriangular = flg;
1553     break;
1554   case MAT_ERROR_LOWER_TRIANGULAR:
1555     aA->ignore_ltriangular = flg;
1556     break;
1557   case MAT_GETROW_UPPERTRIANGULAR:
1558     aA->getrow_utriangular = flg;
1559     break;
1560   default:
1561     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1562   }
1563   PetscFunctionReturn(PETSC_SUCCESS);
1564 }
1565 
1566 PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1567 {
1568   PetscFunctionBegin;
1569   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1570   if (reuse == MAT_INITIAL_MATRIX) {
1571     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1572   } else if (reuse == MAT_REUSE_MATRIX) {
1573     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1574   }
1575   PetscFunctionReturn(PETSC_SUCCESS);
1576 }
1577 
1578 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1579 {
1580   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1581   Mat           a = baij->A, b = baij->B;
1582   PetscInt      nv, m, n;
1583   PetscBool     flg;
1584 
1585   PetscFunctionBegin;
1586   if (ll != rr) {
1587     PetscCall(VecEqual(ll, rr, &flg));
1588     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1589   }
1590   if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1591 
1592   PetscCall(MatGetLocalSize(mat, &m, &n));
1593   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1594 
1595   PetscCall(VecGetLocalSize(rr, &nv));
1596   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1597 
1598   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1599 
1600   /* left diagonalscale the off-diagonal part */
1601   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1602 
1603   /* scale the diagonal part */
1604   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1605 
1606   /* right diagonalscale the off-diagonal part */
1607   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1608   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1609   PetscFunctionReturn(PETSC_SUCCESS);
1610 }
1611 
1612 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1613 {
1614   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1615 
1616   PetscFunctionBegin;
1617   PetscCall(MatSetUnfactored(a->A));
1618   PetscFunctionReturn(PETSC_SUCCESS);
1619 }
1620 
1621 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1622 
1623 PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1624 {
1625   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1626   Mat           a, b, c, d;
1627   PetscBool     flg;
1628 
1629   PetscFunctionBegin;
1630   a = matA->A;
1631   b = matA->B;
1632   c = matB->A;
1633   d = matB->B;
1634 
1635   PetscCall(MatEqual(a, c, &flg));
1636   if (flg) PetscCall(MatEqual(b, d, &flg));
1637   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1638   PetscFunctionReturn(PETSC_SUCCESS);
1639 }
1640 
1641 PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1642 {
1643   PetscBool isbaij;
1644 
1645   PetscFunctionBegin;
1646   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1647   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1648   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1649   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1650     PetscCall(MatGetRowUpperTriangular(A));
1651     PetscCall(MatCopy_Basic(A, B, str));
1652     PetscCall(MatRestoreRowUpperTriangular(A));
1653   } else {
1654     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1655     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1656 
1657     PetscCall(MatCopy(a->A, b->A, str));
1658     PetscCall(MatCopy(a->B, b->B, str));
1659   }
1660   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1661   PetscFunctionReturn(PETSC_SUCCESS);
1662 }
1663 
1664 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1665 {
1666   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1667   PetscBLASInt  bnz, one                          = 1;
1668   Mat_SeqSBAIJ *xa, *ya;
1669   Mat_SeqBAIJ  *xb, *yb;
1670 
1671   PetscFunctionBegin;
1672   if (str == SAME_NONZERO_PATTERN) {
1673     PetscScalar alpha = a;
1674     xa                = (Mat_SeqSBAIJ *)xx->A->data;
1675     ya                = (Mat_SeqSBAIJ *)yy->A->data;
1676     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1677     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1678     xb = (Mat_SeqBAIJ *)xx->B->data;
1679     yb = (Mat_SeqBAIJ *)yy->B->data;
1680     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1681     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1682     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1683   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1684     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1685     PetscCall(MatAXPY_Basic(Y, a, X, str));
1686     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1687   } else {
1688     Mat       B;
1689     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1690     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1691     PetscCall(MatGetRowUpperTriangular(X));
1692     PetscCall(MatGetRowUpperTriangular(Y));
1693     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1694     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1695     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1696     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1697     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1698     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1699     PetscCall(MatSetType(B, MATMPISBAIJ));
1700     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1701     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1702     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1703     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1704     PetscCall(MatHeaderMerge(Y, &B));
1705     PetscCall(PetscFree(nnz_d));
1706     PetscCall(PetscFree(nnz_o));
1707     PetscCall(MatRestoreRowUpperTriangular(X));
1708     PetscCall(MatRestoreRowUpperTriangular(Y));
1709   }
1710   PetscFunctionReturn(PETSC_SUCCESS);
1711 }
1712 
1713 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1714 {
1715   PetscInt  i;
1716   PetscBool flg;
1717 
1718   PetscFunctionBegin;
1719   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1720   for (i = 0; i < n; i++) {
1721     PetscCall(ISEqual(irow[i], icol[i], &flg));
1722     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1723   }
1724   PetscFunctionReturn(PETSC_SUCCESS);
1725 }
1726 
1727 PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1728 {
1729   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1730   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
1731 
1732   PetscFunctionBegin;
1733   if (!Y->preallocated) {
1734     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1735   } else if (!aij->nz) {
1736     PetscInt nonew = aij->nonew;
1737     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1738     aij->nonew = nonew;
1739   }
1740   PetscCall(MatShift_Basic(Y, a));
1741   PetscFunctionReturn(PETSC_SUCCESS);
1742 }
1743 
1744 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1745 {
1746   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1747 
1748   PetscFunctionBegin;
1749   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
1750   PetscCall(MatMissingDiagonal(a->A, missing, d));
1751   if (d) {
1752     PetscInt rstart;
1753     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1754     *d += rstart / A->rmap->bs;
1755   }
1756   PetscFunctionReturn(PETSC_SUCCESS);
1757 }
1758 
1759 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1760 {
1761   PetscFunctionBegin;
1762   *a = ((Mat_MPISBAIJ *)A->data)->A;
1763   PetscFunctionReturn(PETSC_SUCCESS);
1764 }
1765 
1766 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1767                                        MatGetRow_MPISBAIJ,
1768                                        MatRestoreRow_MPISBAIJ,
1769                                        MatMult_MPISBAIJ,
1770                                        /*  4*/ MatMultAdd_MPISBAIJ,
1771                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1772                                        MatMultAdd_MPISBAIJ,
1773                                        NULL,
1774                                        NULL,
1775                                        NULL,
1776                                        /* 10*/ NULL,
1777                                        NULL,
1778                                        NULL,
1779                                        MatSOR_MPISBAIJ,
1780                                        MatTranspose_MPISBAIJ,
1781                                        /* 15*/ MatGetInfo_MPISBAIJ,
1782                                        MatEqual_MPISBAIJ,
1783                                        MatGetDiagonal_MPISBAIJ,
1784                                        MatDiagonalScale_MPISBAIJ,
1785                                        MatNorm_MPISBAIJ,
1786                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1787                                        MatAssemblyEnd_MPISBAIJ,
1788                                        MatSetOption_MPISBAIJ,
1789                                        MatZeroEntries_MPISBAIJ,
1790                                        /* 24*/ NULL,
1791                                        NULL,
1792                                        NULL,
1793                                        NULL,
1794                                        NULL,
1795                                        /* 29*/ MatSetUp_MPI_Hash,
1796                                        NULL,
1797                                        NULL,
1798                                        MatGetDiagonalBlock_MPISBAIJ,
1799                                        NULL,
1800                                        /* 34*/ MatDuplicate_MPISBAIJ,
1801                                        NULL,
1802                                        NULL,
1803                                        NULL,
1804                                        NULL,
1805                                        /* 39*/ MatAXPY_MPISBAIJ,
1806                                        MatCreateSubMatrices_MPISBAIJ,
1807                                        MatIncreaseOverlap_MPISBAIJ,
1808                                        MatGetValues_MPISBAIJ,
1809                                        MatCopy_MPISBAIJ,
1810                                        /* 44*/ NULL,
1811                                        MatScale_MPISBAIJ,
1812                                        MatShift_MPISBAIJ,
1813                                        NULL,
1814                                        NULL,
1815                                        /* 49*/ NULL,
1816                                        NULL,
1817                                        NULL,
1818                                        NULL,
1819                                        NULL,
1820                                        /* 54*/ NULL,
1821                                        NULL,
1822                                        MatSetUnfactored_MPISBAIJ,
1823                                        NULL,
1824                                        MatSetValuesBlocked_MPISBAIJ,
1825                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1826                                        NULL,
1827                                        NULL,
1828                                        NULL,
1829                                        NULL,
1830                                        /* 64*/ NULL,
1831                                        NULL,
1832                                        NULL,
1833                                        NULL,
1834                                        NULL,
1835                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1836                                        NULL,
1837                                        MatConvert_MPISBAIJ_Basic,
1838                                        NULL,
1839                                        NULL,
1840                                        /* 74*/ NULL,
1841                                        NULL,
1842                                        NULL,
1843                                        NULL,
1844                                        NULL,
1845                                        /* 79*/ NULL,
1846                                        NULL,
1847                                        NULL,
1848                                        NULL,
1849                                        MatLoad_MPISBAIJ,
1850                                        /* 84*/ NULL,
1851                                        NULL,
1852                                        NULL,
1853                                        NULL,
1854                                        NULL,
1855                                        /* 89*/ NULL,
1856                                        NULL,
1857                                        NULL,
1858                                        NULL,
1859                                        NULL,
1860                                        /* 94*/ NULL,
1861                                        NULL,
1862                                        NULL,
1863                                        NULL,
1864                                        NULL,
1865                                        /* 99*/ NULL,
1866                                        NULL,
1867                                        NULL,
1868                                        MatConjugate_MPISBAIJ,
1869                                        NULL,
1870                                        /*104*/ NULL,
1871                                        MatRealPart_MPISBAIJ,
1872                                        MatImaginaryPart_MPISBAIJ,
1873                                        MatGetRowUpperTriangular_MPISBAIJ,
1874                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1875                                        /*109*/ NULL,
1876                                        NULL,
1877                                        NULL,
1878                                        NULL,
1879                                        MatMissingDiagonal_MPISBAIJ,
1880                                        /*114*/ NULL,
1881                                        NULL,
1882                                        NULL,
1883                                        NULL,
1884                                        NULL,
1885                                        /*119*/ NULL,
1886                                        NULL,
1887                                        NULL,
1888                                        NULL,
1889                                        NULL,
1890                                        /*124*/ NULL,
1891                                        NULL,
1892                                        NULL,
1893                                        NULL,
1894                                        NULL,
1895                                        /*129*/ NULL,
1896                                        NULL,
1897                                        NULL,
1898                                        NULL,
1899                                        NULL,
1900                                        /*134*/ NULL,
1901                                        NULL,
1902                                        NULL,
1903                                        NULL,
1904                                        NULL,
1905                                        /*139*/ MatSetBlockSizes_Default,
1906                                        NULL,
1907                                        NULL,
1908                                        NULL,
1909                                        NULL,
1910                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1911                                        NULL,
1912                                        NULL,
1913                                        NULL,
1914                                        NULL,
1915                                        NULL,
1916                                        /*150*/ NULL,
1917                                        NULL};
1918 
1919 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1920 {
1921   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1922   PetscInt      i, mbs, Mbs;
1923   PetscMPIInt   size;
1924 
1925   PetscFunctionBegin;
1926   if (B->hash_active) {
1927     B->ops[0]      = b->cops;
1928     B->hash_active = PETSC_FALSE;
1929   }
1930   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1931   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
1932   PetscCall(PetscLayoutSetUp(B->rmap));
1933   PetscCall(PetscLayoutSetUp(B->cmap));
1934   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1935   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1936   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1937 
1938   mbs = B->rmap->n / bs;
1939   Mbs = B->rmap->N / bs;
1940   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1941 
1942   B->rmap->bs = bs;
1943   b->bs2      = bs * bs;
1944   b->mbs      = mbs;
1945   b->Mbs      = Mbs;
1946   b->nbs      = B->cmap->n / bs;
1947   b->Nbs      = B->cmap->N / bs;
1948 
1949   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1950   b->rstartbs = B->rmap->rstart / bs;
1951   b->rendbs   = B->rmap->rend / bs;
1952 
1953   b->cstartbs = B->cmap->rstart / bs;
1954   b->cendbs   = B->cmap->rend / bs;
1955 
1956 #if defined(PETSC_USE_CTABLE)
1957   PetscCall(PetscHMapIDestroy(&b->colmap));
1958 #else
1959   PetscCall(PetscFree(b->colmap));
1960 #endif
1961   PetscCall(PetscFree(b->garray));
1962   PetscCall(VecDestroy(&b->lvec));
1963   PetscCall(VecScatterDestroy(&b->Mvctx));
1964   PetscCall(VecDestroy(&b->slvec0));
1965   PetscCall(VecDestroy(&b->slvec0b));
1966   PetscCall(VecDestroy(&b->slvec1));
1967   PetscCall(VecDestroy(&b->slvec1a));
1968   PetscCall(VecDestroy(&b->slvec1b));
1969   PetscCall(VecScatterDestroy(&b->sMvctx));
1970 
1971   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1972   PetscCall(MatDestroy(&b->B));
1973   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
1974   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
1975   PetscCall(MatSetType(b->B, MATSEQBAIJ));
1976 
1977   PetscCall(MatDestroy(&b->A));
1978   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
1979   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
1980   PetscCall(MatSetType(b->A, MATSEQSBAIJ));
1981 
1982   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
1983   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
1984 
1985   B->preallocated  = PETSC_TRUE;
1986   B->was_assembled = PETSC_FALSE;
1987   B->assembled     = PETSC_FALSE;
1988   PetscFunctionReturn(PETSC_SUCCESS);
1989 }
1990 
1991 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
1992 {
1993   PetscInt        m, rstart, cend;
1994   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
1995   const PetscInt *JJ          = NULL;
1996   PetscScalar    *values      = NULL;
1997   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
1998   PetscBool       nooffprocentries;
1999 
2000   PetscFunctionBegin;
2001   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
2002   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2003   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2004   PetscCall(PetscLayoutSetUp(B->rmap));
2005   PetscCall(PetscLayoutSetUp(B->cmap));
2006   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2007   m      = B->rmap->n / bs;
2008   rstart = B->rmap->rstart / bs;
2009   cend   = B->cmap->rend / bs;
2010 
2011   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2012   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2013   for (i = 0; i < m; i++) {
2014     nz = ii[i + 1] - ii[i];
2015     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2016     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2017     JJ = jj + ii[i];
2018     bd = 0;
2019     for (j = 0; j < nz; j++) {
2020       if (*JJ >= i + rstart) break;
2021       JJ++;
2022       bd++;
2023     }
2024     d = 0;
2025     for (; j < nz; j++) {
2026       if (*JJ++ >= cend) break;
2027       d++;
2028     }
2029     d_nnz[i] = d;
2030     o_nnz[i] = nz - d - bd;
2031     nz       = nz - bd;
2032     nz_max   = PetscMax(nz_max, nz);
2033   }
2034   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2035   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
2036   PetscCall(PetscFree2(d_nnz, o_nnz));
2037 
2038   values = (PetscScalar *)V;
2039   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2040   for (i = 0; i < m; i++) {
2041     PetscInt        row   = i + rstart;
2042     PetscInt        ncols = ii[i + 1] - ii[i];
2043     const PetscInt *icols = jj + ii[i];
2044     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2045       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2046       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2047     } else { /* block ordering does not match so we can only insert one block at a time. */
2048       PetscInt j;
2049       for (j = 0; j < ncols; j++) {
2050         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2051         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2052       }
2053     }
2054   }
2055 
2056   if (!V) PetscCall(PetscFree(values));
2057   nooffprocentries    = B->nooffprocentries;
2058   B->nooffprocentries = PETSC_TRUE;
2059   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2060   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2061   B->nooffprocentries = nooffprocentries;
2062 
2063   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2064   PetscFunctionReturn(PETSC_SUCCESS);
2065 }
2066 
2067 /*MC
2068    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2069    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2070    the matrix is stored.
2071 
2072    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2073    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2074 
2075    Options Database Key:
2076 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2077 
2078    Level: beginner
2079 
2080    Note:
2081      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2082      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2083 
2084 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2085 M*/
2086 
2087 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2088 {
2089   Mat_MPISBAIJ *b;
2090   PetscBool     flg = PETSC_FALSE;
2091 
2092   PetscFunctionBegin;
2093   PetscCall(PetscNew(&b));
2094   B->data   = (void *)b;
2095   B->ops[0] = MatOps_Values;
2096 
2097   B->ops->destroy = MatDestroy_MPISBAIJ;
2098   B->ops->view    = MatView_MPISBAIJ;
2099   B->assembled    = PETSC_FALSE;
2100   B->insertmode   = NOT_SET_VALUES;
2101 
2102   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2103   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2104 
2105   /* build local table of row and column ownerships */
2106   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2107 
2108   /* build cache for off array entries formed */
2109   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2110 
2111   b->donotstash  = PETSC_FALSE;
2112   b->colmap      = NULL;
2113   b->garray      = NULL;
2114   b->roworiented = PETSC_TRUE;
2115 
2116   /* stuff used in block assembly */
2117   b->barray = NULL;
2118 
2119   /* stuff used for matrix vector multiply */
2120   b->lvec    = NULL;
2121   b->Mvctx   = NULL;
2122   b->slvec0  = NULL;
2123   b->slvec0b = NULL;
2124   b->slvec1  = NULL;
2125   b->slvec1a = NULL;
2126   b->slvec1b = NULL;
2127   b->sMvctx  = NULL;
2128 
2129   /* stuff for MatGetRow() */
2130   b->rowindices   = NULL;
2131   b->rowvalues    = NULL;
2132   b->getrowactive = PETSC_FALSE;
2133 
2134   /* hash table stuff */
2135   b->ht           = NULL;
2136   b->hd           = NULL;
2137   b->ht_size      = 0;
2138   b->ht_flag      = PETSC_FALSE;
2139   b->ht_fact      = 0;
2140   b->ht_total_ct  = 0;
2141   b->ht_insert_ct = 0;
2142 
2143   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2144   b->ijonly = PETSC_FALSE;
2145 
2146   b->in_loc = NULL;
2147   b->v_loc  = NULL;
2148   b->n_loc  = 0;
2149 
2150   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2151   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2152   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2153   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2154 #if defined(PETSC_HAVE_ELEMENTAL)
2155   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2156 #endif
2157 #if defined(PETSC_HAVE_SCALAPACK)
2158   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2159 #endif
2160   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2161   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2162 
2163   B->symmetric                   = PETSC_BOOL3_TRUE;
2164   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2165   B->symmetry_eternal            = PETSC_TRUE;
2166   B->structural_symmetry_eternal = PETSC_TRUE;
2167 #if defined(PETSC_USE_COMPLEX)
2168   B->hermitian = PETSC_BOOL3_FALSE;
2169 #else
2170   B->hermitian = PETSC_BOOL3_TRUE;
2171 #endif
2172 
2173   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2174   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2175   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2176   if (flg) {
2177     PetscReal fact = 1.39;
2178     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2179     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2180     if (fact <= 1.0) fact = 1.39;
2181     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2182     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2183   }
2184   PetscOptionsEnd();
2185   PetscFunctionReturn(PETSC_SUCCESS);
2186 }
2187 
2188 /*MC
2189    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2190 
2191    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2192    and `MATMPISBAIJ` otherwise.
2193 
2194    Options Database Key:
2195 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2196 
2197   Level: beginner
2198 
2199 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2200 M*/
2201 
2202 /*@C
2203    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2204    the user should preallocate the matrix storage by setting the parameters
2205    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2206    performance can be increased by more than a factor of 50.
2207 
2208    Collective
2209 
2210    Input Parameters:
2211 +  B - the matrix
2212 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2213           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2214 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2215            submatrix  (same for all local rows)
2216 .  d_nnz - array containing the number of block nonzeros in the various block rows
2217            in the upper triangular and diagonal part of the in diagonal portion of the local
2218            (possibly different for each block row) or `NULL`.  If you plan to factor the matrix you must leave room
2219            for the diagonal entry and set a value even if it is zero.
2220 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2221            submatrix (same for all local rows).
2222 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2223            off-diagonal portion of the local submatrix that is right of the diagonal
2224            (possibly different for each block row) or `NULL`.
2225 
2226    Options Database Keys:
2227 +   -mat_no_unroll - uses code that does not unroll the loops in the
2228                      block calculations (much slower)
2229 -   -mat_block_size - size of the blocks to use
2230 
2231    Level: intermediate
2232 
2233    Notes:
2234 
2235    If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2236    than it must be used on all processors that share the object for that argument.
2237 
2238    If the *_nnz parameter is given then the *_nz parameter is ignored
2239 
2240    Storage Information:
2241    For a square global matrix we define each processor's diagonal portion
2242    to be its local rows and the corresponding columns (a square submatrix);
2243    each processor's off-diagonal portion encompasses the remainder of the
2244    local matrix (a rectangular submatrix).
2245 
2246    The user can specify preallocated storage for the diagonal part of
2247    the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
2248    `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2249    memory allocation.  Likewise, specify preallocated storage for the
2250    off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2251 
2252    You can call `MatGetInfo()` to get information on how effective the preallocation was;
2253    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2254    You can also run with the option `-info` and look for messages with the string
2255    malloc in them to see if additional memory allocation was needed.
2256 
2257    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2258    the figure below we depict these three local rows and all columns (0-11).
2259 
2260 .vb
2261            0 1 2 3 4 5 6 7 8 9 10 11
2262           --------------------------
2263    row 3  |. . . d d d o o o o  o  o
2264    row 4  |. . . d d d o o o o  o  o
2265    row 5  |. . . d d d o o o o  o  o
2266           --------------------------
2267 .ve
2268 
2269    Thus, any entries in the d locations are stored in the d (diagonal)
2270    submatrix, and any entries in the o locations are stored in the
2271    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2272    `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2273 
2274    Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2275    plus the diagonal part of the d matrix,
2276    and `o_nz` should indicate the number of block nonzeros per row in the o matrix
2277 
2278    In general, for PDE problems in which most nonzeros are near the diagonal,
2279    one expects `d_nz` >> `o_nz`.
2280 
2281 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2282 @*/
2283 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2284 {
2285   PetscFunctionBegin;
2286   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2287   PetscValidType(B, 1);
2288   PetscValidLogicalCollectiveInt(B, bs, 2);
2289   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2290   PetscFunctionReturn(PETSC_SUCCESS);
2291 }
2292 
2293 /*@C
2294    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2295    (block compressed row).  For good matrix assembly performance
2296    the user should preallocate the matrix storage by setting the parameters
2297    `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
2298 
2299    Collective
2300 
2301    Input Parameters:
2302 +  comm - MPI communicator
2303 .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2304           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2305 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
2306            This value should be the same as the local size used in creating the
2307            y vector for the matrix-vector product y = Ax.
2308 .  n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
2309            This value should be the same as the local size used in creating the
2310            x vector for the matrix-vector product y = Ax.
2311 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2312 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2313 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2314            submatrix (same for all local rows)
2315 .  d_nnz - array containing the number of block nonzeros in the various block rows
2316            in the upper triangular portion of the in diagonal portion of the local
2317            (possibly different for each block block row) or `NULL`.
2318            If you plan to factor the matrix you must leave room for the diagonal entry and
2319            set its value even if it is zero.
2320 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2321            submatrix (same for all local rows).
2322 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2323            off-diagonal portion of the local submatrix (possibly different for
2324            each block row) or `NULL`.
2325 
2326    Output Parameter:
2327 .  A - the matrix
2328 
2329    Options Database Keys:
2330 +   -mat_no_unroll - uses code that does not unroll the loops in the
2331                      block calculations (much slower)
2332 .   -mat_block_size - size of the blocks to use
2333 -   -mat_mpi - use the parallel matrix data structures even on one processor
2334                (defaults to using SeqBAIJ format on one processor)
2335 
2336    Level: intermediate
2337 
2338    Notes:
2339    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2340    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2341    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2342 
2343    The number of rows and columns must be divisible by blocksize.
2344    This matrix type does not support complex Hermitian operation.
2345 
2346    The user MUST specify either the local or global matrix dimensions
2347    (possibly both).
2348 
2349    If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2350    than it must be used on all processors that share the object for that argument.
2351 
2352    If the *_nnz parameter is given then the *_nz parameter is ignored
2353 
2354    Storage Information:
2355    For a square global matrix we define each processor's diagonal portion
2356    to be its local rows and the corresponding columns (a square submatrix);
2357    each processor's off-diagonal portion encompasses the remainder of the
2358    local matrix (a rectangular submatrix).
2359 
2360    The user can specify preallocated storage for the diagonal part of
2361    the local submatrix with either `d_nz` or `d_nnz` (not both). Set
2362    `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2363    memory allocation. Likewise, specify preallocated storage for the
2364    off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2365 
2366    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2367    the figure below we depict these three local rows and all columns (0-11).
2368 
2369 .vb
2370            0 1 2 3 4 5 6 7 8 9 10 11
2371           --------------------------
2372    row 3  |. . . d d d o o o o  o  o
2373    row 4  |. . . d d d o o o o  o  o
2374    row 5  |. . . d d d o o o o  o  o
2375           --------------------------
2376 .ve
2377 
2378    Thus, any entries in the d locations are stored in the d (diagonal)
2379    submatrix, and any entries in the o locations are stored in the
2380    o (off-diagonal) submatrix. Note that the d matrix is stored in
2381    `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2382 
2383    Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2384    plus the diagonal part of the d matrix,
2385    and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
2386    In general, for PDE problems in which most nonzeros are near the diagonal,
2387    one expects `d_nz` >> `o_nz`.
2388 
2389 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2390 @*/
2391 
2392 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2393 {
2394   PetscMPIInt size;
2395 
2396   PetscFunctionBegin;
2397   PetscCall(MatCreate(comm, A));
2398   PetscCall(MatSetSizes(*A, m, n, M, N));
2399   PetscCallMPI(MPI_Comm_size(comm, &size));
2400   if (size > 1) {
2401     PetscCall(MatSetType(*A, MATMPISBAIJ));
2402     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2403   } else {
2404     PetscCall(MatSetType(*A, MATSEQSBAIJ));
2405     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2406   }
2407   PetscFunctionReturn(PETSC_SUCCESS);
2408 }
2409 
2410 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2411 {
2412   Mat           mat;
2413   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2414   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2415   PetscScalar  *array;
2416 
2417   PetscFunctionBegin;
2418   *newmat = NULL;
2419 
2420   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2421   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2422   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2423   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2424   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2425 
2426   mat->factortype   = matin->factortype;
2427   mat->preallocated = PETSC_TRUE;
2428   mat->assembled    = PETSC_TRUE;
2429   mat->insertmode   = NOT_SET_VALUES;
2430 
2431   a      = (Mat_MPISBAIJ *)mat->data;
2432   a->bs2 = oldmat->bs2;
2433   a->mbs = oldmat->mbs;
2434   a->nbs = oldmat->nbs;
2435   a->Mbs = oldmat->Mbs;
2436   a->Nbs = oldmat->Nbs;
2437 
2438   a->size         = oldmat->size;
2439   a->rank         = oldmat->rank;
2440   a->donotstash   = oldmat->donotstash;
2441   a->roworiented  = oldmat->roworiented;
2442   a->rowindices   = NULL;
2443   a->rowvalues    = NULL;
2444   a->getrowactive = PETSC_FALSE;
2445   a->barray       = NULL;
2446   a->rstartbs     = oldmat->rstartbs;
2447   a->rendbs       = oldmat->rendbs;
2448   a->cstartbs     = oldmat->cstartbs;
2449   a->cendbs       = oldmat->cendbs;
2450 
2451   /* hash table stuff */
2452   a->ht           = NULL;
2453   a->hd           = NULL;
2454   a->ht_size      = 0;
2455   a->ht_flag      = oldmat->ht_flag;
2456   a->ht_fact      = oldmat->ht_fact;
2457   a->ht_total_ct  = 0;
2458   a->ht_insert_ct = 0;
2459 
2460   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2461   if (oldmat->colmap) {
2462 #if defined(PETSC_USE_CTABLE)
2463     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2464 #else
2465     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2466     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2467 #endif
2468   } else a->colmap = NULL;
2469 
2470   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
2471     PetscCall(PetscMalloc1(len, &a->garray));
2472     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2473   } else a->garray = NULL;
2474 
2475   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2476   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2477   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2478 
2479   PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2480   PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2481 
2482   PetscCall(VecGetLocalSize(a->slvec1, &nt));
2483   PetscCall(VecGetArray(a->slvec1, &array));
2484   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2485   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2486   PetscCall(VecRestoreArray(a->slvec1, &array));
2487   PetscCall(VecGetArray(a->slvec0, &array));
2488   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2489   PetscCall(VecRestoreArray(a->slvec0, &array));
2490 
2491   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2492   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2493   a->sMvctx = oldmat->sMvctx;
2494 
2495   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2496   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2497   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2498   *newmat = mat;
2499   PetscFunctionReturn(PETSC_SUCCESS);
2500 }
2501 
2502 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2503 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2504 
2505 PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2506 {
2507   PetscBool isbinary;
2508 
2509   PetscFunctionBegin;
2510   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2511   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2512   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2513   PetscFunctionReturn(PETSC_SUCCESS);
2514 }
2515 
2516 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2517 {
2518   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2519   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)(a->B)->data;
2520   PetscReal     atmp;
2521   PetscReal    *work, *svalues, *rvalues;
2522   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2523   PetscMPIInt   rank, size;
2524   PetscInt     *rowners_bs, dest, count, source;
2525   PetscScalar  *va;
2526   MatScalar    *ba;
2527   MPI_Status    stat;
2528 
2529   PetscFunctionBegin;
2530   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2531   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2532   PetscCall(VecGetArray(v, &va));
2533 
2534   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2535   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2536 
2537   bs  = A->rmap->bs;
2538   mbs = a->mbs;
2539   Mbs = a->Mbs;
2540   ba  = b->a;
2541   bi  = b->i;
2542   bj  = b->j;
2543 
2544   /* find ownerships */
2545   rowners_bs = A->rmap->range;
2546 
2547   /* each proc creates an array to be distributed */
2548   PetscCall(PetscCalloc1(bs * Mbs, &work));
2549 
2550   /* row_max for B */
2551   if (rank != size - 1) {
2552     for (i = 0; i < mbs; i++) {
2553       ncols = bi[1] - bi[0];
2554       bi++;
2555       brow = bs * i;
2556       for (j = 0; j < ncols; j++) {
2557         bcol = bs * (*bj);
2558         for (kcol = 0; kcol < bs; kcol++) {
2559           col = bcol + kcol;           /* local col index */
2560           col += rowners_bs[rank + 1]; /* global col index */
2561           for (krow = 0; krow < bs; krow++) {
2562             atmp = PetscAbsScalar(*ba);
2563             ba++;
2564             row = brow + krow; /* local row index */
2565             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2566             if (work[col] < atmp) work[col] = atmp;
2567           }
2568         }
2569         bj++;
2570       }
2571     }
2572 
2573     /* send values to its owners */
2574     for (dest = rank + 1; dest < size; dest++) {
2575       svalues = work + rowners_bs[dest];
2576       count   = rowners_bs[dest + 1] - rowners_bs[dest];
2577       PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2578     }
2579   }
2580 
2581   /* receive values */
2582   if (rank) {
2583     rvalues = work;
2584     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2585     for (source = 0; source < rank; source++) {
2586       PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2587       /* process values */
2588       for (i = 0; i < count; i++) {
2589         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2590       }
2591     }
2592   }
2593 
2594   PetscCall(VecRestoreArray(v, &va));
2595   PetscCall(PetscFree(work));
2596   PetscFunctionReturn(PETSC_SUCCESS);
2597 }
2598 
2599 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2600 {
2601   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2602   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
2603   PetscScalar       *x, *ptr, *from;
2604   Vec                bb1;
2605   const PetscScalar *b;
2606 
2607   PetscFunctionBegin;
2608   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2609   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2610 
2611   if (flag == SOR_APPLY_UPPER) {
2612     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2613     PetscFunctionReturn(PETSC_SUCCESS);
2614   }
2615 
2616   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2617     if (flag & SOR_ZERO_INITIAL_GUESS) {
2618       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2619       its--;
2620     }
2621 
2622     PetscCall(VecDuplicate(bb, &bb1));
2623     while (its--) {
2624       /* lower triangular part: slvec0b = - B^T*xx */
2625       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2626 
2627       /* copy xx into slvec0a */
2628       PetscCall(VecGetArray(mat->slvec0, &ptr));
2629       PetscCall(VecGetArray(xx, &x));
2630       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2631       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2632 
2633       PetscCall(VecScale(mat->slvec0, -1.0));
2634 
2635       /* copy bb into slvec1a */
2636       PetscCall(VecGetArray(mat->slvec1, &ptr));
2637       PetscCall(VecGetArrayRead(bb, &b));
2638       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2639       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2640 
2641       /* set slvec1b = 0 */
2642       PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2643       PetscCall(VecZeroEntries(mat->slvec1b));
2644 
2645       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2646       PetscCall(VecRestoreArray(xx, &x));
2647       PetscCall(VecRestoreArrayRead(bb, &b));
2648       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2649 
2650       /* upper triangular part: bb1 = bb1 - B*x */
2651       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2652 
2653       /* local diagonal sweep */
2654       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2655     }
2656     PetscCall(VecDestroy(&bb1));
2657   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2658     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2659   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2660     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2661   } else if (flag & SOR_EISENSTAT) {
2662     Vec                xx1;
2663     PetscBool          hasop;
2664     const PetscScalar *diag;
2665     PetscScalar       *sl, scale = (omega - 2.0) / omega;
2666     PetscInt           i, n;
2667 
2668     if (!mat->xx1) {
2669       PetscCall(VecDuplicate(bb, &mat->xx1));
2670       PetscCall(VecDuplicate(bb, &mat->bb1));
2671     }
2672     xx1 = mat->xx1;
2673     bb1 = mat->bb1;
2674 
2675     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2676 
2677     if (!mat->diag) {
2678       /* this is wrong for same matrix with new nonzero values */
2679       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2680       PetscCall(MatGetDiagonal(matin, mat->diag));
2681     }
2682     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2683 
2684     if (hasop) {
2685       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2686       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2687     } else {
2688       /*
2689           These two lines are replaced by code that may be a bit faster for a good compiler
2690       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2691       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2692       */
2693       PetscCall(VecGetArray(mat->slvec1a, &sl));
2694       PetscCall(VecGetArrayRead(mat->diag, &diag));
2695       PetscCall(VecGetArrayRead(bb, &b));
2696       PetscCall(VecGetArray(xx, &x));
2697       PetscCall(VecGetLocalSize(xx, &n));
2698       if (omega == 1.0) {
2699         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2700         PetscCall(PetscLogFlops(2.0 * n));
2701       } else {
2702         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2703         PetscCall(PetscLogFlops(3.0 * n));
2704       }
2705       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2706       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2707       PetscCall(VecRestoreArrayRead(bb, &b));
2708       PetscCall(VecRestoreArray(xx, &x));
2709     }
2710 
2711     /* multiply off-diagonal portion of matrix */
2712     PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2713     PetscCall(VecZeroEntries(mat->slvec1b));
2714     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2715     PetscCall(VecGetArray(mat->slvec0, &from));
2716     PetscCall(VecGetArray(xx, &x));
2717     PetscCall(PetscArraycpy(from, x, bs * mbs));
2718     PetscCall(VecRestoreArray(mat->slvec0, &from));
2719     PetscCall(VecRestoreArray(xx, &x));
2720     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2721     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2722     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2723 
2724     /* local sweep */
2725     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2726     PetscCall(VecAXPY(xx, 1.0, xx1));
2727   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2728   PetscFunctionReturn(PETSC_SUCCESS);
2729 }
2730 
2731 /*@
2732      MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard
2733          CSR format the local rows.
2734 
2735    Collective
2736 
2737    Input Parameters:
2738 +  comm - MPI communicator
2739 .  bs - the block size, only a block size of 1 is supported
2740 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
2741 .  n - This value should be the same as the local size used in creating the
2742        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
2743        calculated if `N` is given) For square matrices `n` is almost always `m`.
2744 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2745 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2746 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2747 .   j - column indices
2748 -   a - matrix values
2749 
2750    Output Parameter:
2751 .   mat - the matrix
2752 
2753    Level: intermediate
2754 
2755    Notes:
2756        The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
2757      thus you CANNOT change the matrix entries by changing the values of `a` after you have
2758      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
2759 
2760        The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
2761 
2762 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2763           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2764 @*/
2765 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2766 {
2767   PetscFunctionBegin;
2768   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2769   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2770   PetscCall(MatCreate(comm, mat));
2771   PetscCall(MatSetSizes(*mat, m, n, M, N));
2772   PetscCall(MatSetType(*mat, MATMPISBAIJ));
2773   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2774   PetscFunctionReturn(PETSC_SUCCESS);
2775 }
2776 
2777 /*@C
2778    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2779 
2780    Collective
2781 
2782    Input Parameters:
2783 +  B - the matrix
2784 .  bs - the block size
2785 .  i - the indices into `j` for the start of each local row (starts with zero)
2786 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2787 -  v - optional values in the matrix
2788 
2789    Level: advanced
2790 
2791    Notes:
2792    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2793    and usually the numerical values as well
2794 
2795    Any entries below the diagonal are ignored
2796 
2797 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2798 @*/
2799 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2800 {
2801   PetscFunctionBegin;
2802   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2803   PetscFunctionReturn(PETSC_SUCCESS);
2804 }
2805 
2806 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2807 {
2808   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
2809   PetscInt    *indx;
2810   PetscScalar *values;
2811 
2812   PetscFunctionBegin;
2813   PetscCall(MatGetSize(inmat, &m, &N));
2814   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2815     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2816     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
2817     PetscInt     *bindx, rmax = a->rmax, j;
2818     PetscMPIInt   rank, size;
2819 
2820     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2821     mbs = m / bs;
2822     Nbs = N / cbs;
2823     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2824     nbs = n / cbs;
2825 
2826     PetscCall(PetscMalloc1(rmax, &bindx));
2827     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2828 
2829     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2830     PetscCallMPI(MPI_Comm_rank(comm, &size));
2831     if (rank == size - 1) {
2832       /* Check sum(nbs) = Nbs */
2833       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2834     }
2835 
2836     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2837     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2838     for (i = 0; i < mbs; i++) {
2839       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2840       nnz = nnz / bs;
2841       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2842       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2843       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2844     }
2845     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2846     PetscCall(PetscFree(bindx));
2847 
2848     PetscCall(MatCreate(comm, outmat));
2849     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2850     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2851     PetscCall(MatSetType(*outmat, MATSBAIJ));
2852     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2853     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2854     MatPreallocateEnd(dnz, onz);
2855   }
2856 
2857   /* numeric phase */
2858   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2859   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2860 
2861   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2862   for (i = 0; i < m; i++) {
2863     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2864     Ii = i + rstart;
2865     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2866     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2867   }
2868   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2869   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2870   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2871   PetscFunctionReturn(PETSC_SUCCESS);
2872 }
2873