xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision e0b7e82fd3cf27fce84cc3e37e8d70a5c36a2d4e)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscblaslapack.h>
5 
6 static PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
7 {
8   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
9 
10   PetscFunctionBegin;
11   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
12   PetscCall(MatStashDestroy_Private(&mat->stash));
13   PetscCall(MatStashDestroy_Private(&mat->bstash));
14   PetscCall(MatDestroy(&baij->A));
15   PetscCall(MatDestroy(&baij->B));
16 #if defined(PETSC_USE_CTABLE)
17   PetscCall(PetscHMapIDestroy(&baij->colmap));
18 #else
19   PetscCall(PetscFree(baij->colmap));
20 #endif
21   PetscCall(PetscFree(baij->garray));
22   PetscCall(VecDestroy(&baij->lvec));
23   PetscCall(VecScatterDestroy(&baij->Mvctx));
24   PetscCall(VecDestroy(&baij->slvec0));
25   PetscCall(VecDestroy(&baij->slvec0b));
26   PetscCall(VecDestroy(&baij->slvec1));
27   PetscCall(VecDestroy(&baij->slvec1a));
28   PetscCall(VecDestroy(&baij->slvec1b));
29   PetscCall(VecScatterDestroy(&baij->sMvctx));
30   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
31   PetscCall(PetscFree(baij->barray));
32   PetscCall(PetscFree(baij->hd));
33   PetscCall(VecDestroy(&baij->diag));
34   PetscCall(VecDestroy(&baij->bb1));
35   PetscCall(VecDestroy(&baij->xx1));
36 #if defined(PETSC_USE_REAL_MAT_SINGLE)
37   PetscCall(PetscFree(baij->setvaluescopy));
38 #endif
39   PetscCall(PetscFree(baij->in_loc));
40   PetscCall(PetscFree(baij->v_loc));
41   PetscCall(PetscFree(baij->rangebs));
42   PetscCall(PetscFree(mat->data));
43 
44   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
45   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
47   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
49 #if defined(PETSC_HAVE_ELEMENTAL)
50   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
51 #endif
52 #if defined(PETSC_HAVE_SCALAPACK)
53   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
54 #endif
55   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
57   PetscFunctionReturn(PETSC_SUCCESS);
58 }
59 
60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */
61 #define TYPE SBAIJ
62 #define TYPE_SBAIJ
63 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
64 #undef TYPE
65 #undef TYPE_SBAIJ
66 
67 #if defined(PETSC_HAVE_ELEMENTAL)
68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
69 #endif
70 #if defined(PETSC_HAVE_SCALAPACK)
71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
72 #endif
73 
74 /* This could be moved to matimpl.h */
75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
76 {
77   Mat       preallocator;
78   PetscInt  r, rstart, rend;
79   PetscInt  bs, i, m, n, M, N;
80   PetscBool cong = PETSC_TRUE;
81 
82   PetscFunctionBegin;
83   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
84   PetscValidLogicalCollectiveInt(B, nm, 2);
85   for (i = 0; i < nm; i++) {
86     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
87     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
88     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
89   }
90   PetscValidLogicalCollectiveBool(B, fill, 5);
91   PetscCall(MatGetBlockSize(B, &bs));
92   PetscCall(MatGetSize(B, &M, &N));
93   PetscCall(MatGetLocalSize(B, &m, &n));
94   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
95   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
96   PetscCall(MatSetBlockSize(preallocator, bs));
97   PetscCall(MatSetSizes(preallocator, m, n, M, N));
98   PetscCall(MatSetUp(preallocator));
99   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
100   for (r = rstart; r < rend; ++r) {
101     PetscInt           ncols;
102     const PetscInt    *row;
103     const PetscScalar *vals;
104 
105     for (i = 0; i < nm; i++) {
106       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
107       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
108       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
109       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
110     }
111   }
112   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
113   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
114   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
115   PetscCall(MatDestroy(&preallocator));
116   PetscFunctionReturn(PETSC_SUCCESS);
117 }
118 
119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
120 {
121   Mat      B;
122   PetscInt r;
123 
124   PetscFunctionBegin;
125   if (reuse != MAT_REUSE_MATRIX) {
126     PetscBool symm = PETSC_TRUE, isdense;
127     PetscInt  bs;
128 
129     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
130     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
131     PetscCall(MatSetType(B, newtype));
132     PetscCall(MatGetBlockSize(A, &bs));
133     PetscCall(MatSetBlockSize(B, bs));
134     PetscCall(PetscLayoutSetUp(B->rmap));
135     PetscCall(PetscLayoutSetUp(B->cmap));
136     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
137     if (!isdense) {
138       PetscCall(MatGetRowUpperTriangular(A));
139       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
140       PetscCall(MatRestoreRowUpperTriangular(A));
141     } else {
142       PetscCall(MatSetUp(B));
143     }
144   } else {
145     B = *newmat;
146     PetscCall(MatZeroEntries(B));
147   }
148 
149   PetscCall(MatGetRowUpperTriangular(A));
150   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
151     PetscInt           ncols;
152     const PetscInt    *row;
153     const PetscScalar *vals;
154 
155     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
156     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
157 #if defined(PETSC_USE_COMPLEX)
158     if (A->hermitian == PETSC_BOOL3_TRUE) {
159       PetscInt i;
160       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
161     } else {
162       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
163     }
164 #else
165     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
166 #endif
167     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
168   }
169   PetscCall(MatRestoreRowUpperTriangular(A));
170   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
171   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
172 
173   if (reuse == MAT_INPLACE_MATRIX) {
174     PetscCall(MatHeaderReplace(A, &B));
175   } else {
176     *newmat = B;
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
182 {
183   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
184 
185   PetscFunctionBegin;
186   PetscCall(MatStoreValues(aij->A));
187   PetscCall(MatStoreValues(aij->B));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 static PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
192 {
193   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
194 
195   PetscFunctionBegin;
196   PetscCall(MatRetrieveValues(aij->A));
197   PetscCall(MatRetrieveValues(aij->B));
198   PetscFunctionReturn(PETSC_SUCCESS);
199 }
200 
201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
202   do { \
203     brow = row / bs; \
204     rp   = aj + ai[brow]; \
205     ap   = aa + bs2 * ai[brow]; \
206     rmax = aimax[brow]; \
207     nrow = ailen[brow]; \
208     bcol = col / bs; \
209     ridx = row % bs; \
210     cidx = col % bs; \
211     low  = 0; \
212     high = nrow; \
213     while (high - low > 3) { \
214       t = (low + high) / 2; \
215       if (rp[t] > bcol) high = t; \
216       else low = t; \
217     } \
218     for (_i = low; _i < high; _i++) { \
219       if (rp[_i] > bcol) break; \
220       if (rp[_i] == bcol) { \
221         bap = ap + bs2 * _i + bs * cidx + ridx; \
222         if (addv == ADD_VALUES) *bap += value; \
223         else *bap = value; \
224         goto a_noinsert; \
225       } \
226     } \
227     if (a->nonew == 1) goto a_noinsert; \
228     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
229     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
230     N = nrow++ - 1; \
231     /* shift up all the later entries in this row */ \
232     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
233     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
234     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
235     rp[_i]                          = bcol; \
236     ap[bs2 * _i + bs * cidx + ridx] = value; \
237     A->nonzerostate++; \
238   a_noinsert:; \
239     ailen[brow] = nrow; \
240   } while (0)
241 
242 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
243   do { \
244     brow = row / bs; \
245     rp   = bj + bi[brow]; \
246     ap   = ba + bs2 * bi[brow]; \
247     rmax = bimax[brow]; \
248     nrow = bilen[brow]; \
249     bcol = col / bs; \
250     ridx = row % bs; \
251     cidx = col % bs; \
252     low  = 0; \
253     high = nrow; \
254     while (high - low > 3) { \
255       t = (low + high) / 2; \
256       if (rp[t] > bcol) high = t; \
257       else low = t; \
258     } \
259     for (_i = low; _i < high; _i++) { \
260       if (rp[_i] > bcol) break; \
261       if (rp[_i] == bcol) { \
262         bap = ap + bs2 * _i + bs * cidx + ridx; \
263         if (addv == ADD_VALUES) *bap += value; \
264         else *bap = value; \
265         goto b_noinsert; \
266       } \
267     } \
268     if (b->nonew == 1) goto b_noinsert; \
269     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
270     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
271     N = nrow++ - 1; \
272     /* shift up all the later entries in this row */ \
273     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
274     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
275     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
276     rp[_i]                          = bcol; \
277     ap[bs2 * _i + bs * cidx + ridx] = value; \
278     B->nonzerostate++; \
279   b_noinsert:; \
280     bilen[brow] = nrow; \
281   } while (0)
282 
283 /* Only add/insert a(i,j) with i<=j (blocks).
284    Any a(i,j) with i>j input by user is ignored or generates an error
285 */
286 static PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
287 {
288   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
289   MatScalar     value;
290   PetscBool     roworiented = baij->roworiented;
291   PetscInt      i, j, row, col;
292   PetscInt      rstart_orig = mat->rmap->rstart;
293   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
294   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
295 
296   /* Some Variables required in the macro */
297   Mat           A     = baij->A;
298   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)(A)->data;
299   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
300   MatScalar    *aa = a->a;
301 
302   Mat          B     = baij->B;
303   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
304   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
305   MatScalar   *ba = b->a;
306 
307   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
308   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
309   MatScalar *ap, *bap;
310 
311   /* for stash */
312   PetscInt   n_loc, *in_loc = NULL;
313   MatScalar *v_loc = NULL;
314 
315   PetscFunctionBegin;
316   if (!baij->donotstash) {
317     if (n > baij->n_loc) {
318       PetscCall(PetscFree(baij->in_loc));
319       PetscCall(PetscFree(baij->v_loc));
320       PetscCall(PetscMalloc1(n, &baij->in_loc));
321       PetscCall(PetscMalloc1(n, &baij->v_loc));
322 
323       baij->n_loc = n;
324     }
325     in_loc = baij->in_loc;
326     v_loc  = baij->v_loc;
327   }
328 
329   for (i = 0; i < m; i++) {
330     if (im[i] < 0) continue;
331     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
332     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
333       row = im[i] - rstart_orig;                     /* local row index */
334       for (j = 0; j < n; j++) {
335         if (im[i] / bs > in[j] / bs) {
336           if (a->ignore_ltriangular) {
337             continue; /* ignore lower triangular blocks */
338           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
339         }
340         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
341           col  = in[j] - cstart_orig;                    /* local col index */
342           brow = row / bs;
343           bcol = col / bs;
344           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
345           if (roworiented) value = v[i * n + j];
346           else value = v[i + j * m];
347           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
348           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
349         } else if (in[j] < 0) {
350           continue;
351         } else {
352           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
353           /* off-diag entry (B) */
354           if (mat->was_assembled) {
355             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
356 #if defined(PETSC_USE_CTABLE)
357             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
358             col = col - 1;
359 #else
360             col = baij->colmap[in[j] / bs] - 1;
361 #endif
362             if (col < 0 && !((Mat_SeqSBAIJ *)baij->A->data)->nonew) {
363               PetscCall(MatDisAssemble_MPISBAIJ(mat));
364               col = in[j];
365               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
366               B     = baij->B;
367               b     = (Mat_SeqBAIJ *)(B)->data;
368               bimax = b->imax;
369               bi    = b->i;
370               bilen = b->ilen;
371               bj    = b->j;
372               ba    = b->a;
373             } else col += in[j] % bs;
374           } else col = in[j];
375           if (roworiented) value = v[i * n + j];
376           else value = v[i + j * m];
377           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
378           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
379         }
380       }
381     } else { /* off processor entry */
382       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
383       if (!baij->donotstash) {
384         mat->assembled = PETSC_FALSE;
385         n_loc          = 0;
386         for (j = 0; j < n; j++) {
387           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
388           in_loc[n_loc] = in[j];
389           if (roworiented) {
390             v_loc[n_loc] = v[i * n + j];
391           } else {
392             v_loc[n_loc] = v[j * m + i];
393           }
394           n_loc++;
395         }
396         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
397       }
398     }
399   }
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
404 {
405   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
406   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
407   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
408   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
409   PetscBool          roworiented = a->roworiented;
410   const PetscScalar *value       = v;
411   MatScalar         *ap, *aa = a->a, *bap;
412 
413   PetscFunctionBegin;
414   if (col < row) {
415     if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
416     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
417   }
418   rp    = aj + ai[row];
419   ap    = aa + bs2 * ai[row];
420   rmax  = imax[row];
421   nrow  = ailen[row];
422   value = v;
423   low   = 0;
424   high  = nrow;
425 
426   while (high - low > 7) {
427     t = (low + high) / 2;
428     if (rp[t] > col) high = t;
429     else low = t;
430   }
431   for (i = low; i < high; i++) {
432     if (rp[i] > col) break;
433     if (rp[i] == col) {
434       bap = ap + bs2 * i;
435       if (roworiented) {
436         if (is == ADD_VALUES) {
437           for (ii = 0; ii < bs; ii++) {
438             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
439           }
440         } else {
441           for (ii = 0; ii < bs; ii++) {
442             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
443           }
444         }
445       } else {
446         if (is == ADD_VALUES) {
447           for (ii = 0; ii < bs; ii++) {
448             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
449           }
450         } else {
451           for (ii = 0; ii < bs; ii++) {
452             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
453           }
454         }
455       }
456       goto noinsert2;
457     }
458   }
459   if (nonew == 1) goto noinsert2;
460   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
461   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
462   N = nrow++ - 1;
463   high++;
464   /* shift up all the later entries in this row */
465   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
466   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
467   rp[i] = col;
468   bap   = ap + bs2 * i;
469   if (roworiented) {
470     for (ii = 0; ii < bs; ii++) {
471       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
472     }
473   } else {
474     for (ii = 0; ii < bs; ii++) {
475       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
476     }
477   }
478 noinsert2:;
479   ailen[row] = nrow;
480   PetscFunctionReturn(PETSC_SUCCESS);
481 }
482 
483 /*
484    This routine is exactly duplicated in mpibaij.c
485 */
486 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
487 {
488   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
489   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
490   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
491   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
492   PetscBool          roworiented = a->roworiented;
493   const PetscScalar *value       = v;
494   MatScalar         *ap, *aa = a->a, *bap;
495 
496   PetscFunctionBegin;
497   rp    = aj + ai[row];
498   ap    = aa + bs2 * ai[row];
499   rmax  = imax[row];
500   nrow  = ailen[row];
501   low   = 0;
502   high  = nrow;
503   value = v;
504   while (high - low > 7) {
505     t = (low + high) / 2;
506     if (rp[t] > col) high = t;
507     else low = t;
508   }
509   for (i = low; i < high; i++) {
510     if (rp[i] > col) break;
511     if (rp[i] == col) {
512       bap = ap + bs2 * i;
513       if (roworiented) {
514         if (is == ADD_VALUES) {
515           for (ii = 0; ii < bs; ii++) {
516             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
517           }
518         } else {
519           for (ii = 0; ii < bs; ii++) {
520             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
521           }
522         }
523       } else {
524         if (is == ADD_VALUES) {
525           for (ii = 0; ii < bs; ii++, value += bs) {
526             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
527             bap += bs;
528           }
529         } else {
530           for (ii = 0; ii < bs; ii++, value += bs) {
531             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
532             bap += bs;
533           }
534         }
535       }
536       goto noinsert2;
537     }
538   }
539   if (nonew == 1) goto noinsert2;
540   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
541   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
542   N = nrow++ - 1;
543   high++;
544   /* shift up all the later entries in this row */
545   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
546   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
547   rp[i] = col;
548   bap   = ap + bs2 * i;
549   if (roworiented) {
550     for (ii = 0; ii < bs; ii++) {
551       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
552     }
553   } else {
554     for (ii = 0; ii < bs; ii++) {
555       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
556     }
557   }
558 noinsert2:;
559   ailen[row] = nrow;
560   PetscFunctionReturn(PETSC_SUCCESS);
561 }
562 
563 /*
564     This routine could be optimized by removing the need for the block copy below and passing stride information
565   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
566 */
567 static PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
568 {
569   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
570   const MatScalar *value;
571   MatScalar       *barray      = baij->barray;
572   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
573   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
574   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
575   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
576 
577   PetscFunctionBegin;
578   if (!barray) {
579     PetscCall(PetscMalloc1(bs2, &barray));
580     baij->barray = barray;
581   }
582 
583   if (roworiented) {
584     stepval = (n - 1) * bs;
585   } else {
586     stepval = (m - 1) * bs;
587   }
588   for (i = 0; i < m; i++) {
589     if (im[i] < 0) continue;
590     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
591     if (im[i] >= rstart && im[i] < rend) {
592       row = im[i] - rstart;
593       for (j = 0; j < n; j++) {
594         if (im[i] > in[j]) {
595           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
596           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
597         }
598         /* If NumCol = 1 then a copy is not required */
599         if ((roworiented) && (n == 1)) {
600           barray = (MatScalar *)v + i * bs2;
601         } else if ((!roworiented) && (m == 1)) {
602           barray = (MatScalar *)v + j * bs2;
603         } else { /* Here a copy is required */
604           if (roworiented) {
605             value = v + i * (stepval + bs) * bs + j * bs;
606           } else {
607             value = v + j * (stepval + bs) * bs + i * bs;
608           }
609           for (ii = 0; ii < bs; ii++, value += stepval) {
610             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
611           }
612           barray -= bs2;
613         }
614 
615         if (in[j] >= cstart && in[j] < cend) {
616           col = in[j] - cstart;
617           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
618         } else if (in[j] < 0) {
619           continue;
620         } else {
621           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
622           if (mat->was_assembled) {
623             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
624 
625 #if defined(PETSC_USE_DEBUG)
626   #if defined(PETSC_USE_CTABLE)
627             {
628               PetscInt data;
629               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
630               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
631             }
632   #else
633             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
634   #endif
635 #endif
636 #if defined(PETSC_USE_CTABLE)
637             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
638             col = (col - 1) / bs;
639 #else
640             col = (baij->colmap[in[j]] - 1) / bs;
641 #endif
642             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
643               PetscCall(MatDisAssemble_MPISBAIJ(mat));
644               col = in[j];
645             }
646           } else col = in[j];
647           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
648         }
649       }
650     } else {
651       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
652       if (!baij->donotstash) {
653         if (roworiented) {
654           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
655         } else {
656           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
657         }
658       }
659     }
660   }
661   PetscFunctionReturn(PETSC_SUCCESS);
662 }
663 
664 static PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
665 {
666   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
667   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
668   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
669 
670   PetscFunctionBegin;
671   for (i = 0; i < m; i++) {
672     if (idxm[i] < 0) continue; /* negative row */
673     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
674     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
675       row = idxm[i] - bsrstart;
676       for (j = 0; j < n; j++) {
677         if (idxn[j] < 0) continue; /* negative column */
678         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
679         if (idxn[j] >= bscstart && idxn[j] < bscend) {
680           col = idxn[j] - bscstart;
681           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
682         } else {
683           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
684 #if defined(PETSC_USE_CTABLE)
685           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
686           data--;
687 #else
688           data = baij->colmap[idxn[j] / bs] - 1;
689 #endif
690           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
691           else {
692             col = data + idxn[j] % bs;
693             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
694           }
695         }
696       }
697     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
698   }
699   PetscFunctionReturn(PETSC_SUCCESS);
700 }
701 
702 static PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
703 {
704   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
705   PetscReal     sum[2], *lnorm2;
706 
707   PetscFunctionBegin;
708   if (baij->size == 1) {
709     PetscCall(MatNorm(baij->A, type, norm));
710   } else {
711     if (type == NORM_FROBENIUS) {
712       PetscCall(PetscMalloc1(2, &lnorm2));
713       PetscCall(MatNorm(baij->A, type, lnorm2));
714       *lnorm2 = (*lnorm2) * (*lnorm2);
715       lnorm2++; /* squar power of norm(A) */
716       PetscCall(MatNorm(baij->B, type, lnorm2));
717       *lnorm2 = (*lnorm2) * (*lnorm2);
718       lnorm2--; /* squar power of norm(B) */
719       PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
720       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
721       PetscCall(PetscFree(lnorm2));
722     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
723       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
724       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
725       PetscReal    *rsum, *rsum2, vabs;
726       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
727       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
728       MatScalar    *v;
729 
730       PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
731       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
732       /* Amat */
733       v  = amat->a;
734       jj = amat->j;
735       for (brow = 0; brow < mbs; brow++) {
736         grow = bs * (rstart + brow);
737         nz   = amat->i[brow + 1] - amat->i[brow];
738         for (bcol = 0; bcol < nz; bcol++) {
739           gcol = bs * (rstart + *jj);
740           jj++;
741           for (col = 0; col < bs; col++) {
742             for (row = 0; row < bs; row++) {
743               vabs = PetscAbsScalar(*v);
744               v++;
745               rsum[gcol + col] += vabs;
746               /* non-diagonal block */
747               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
748             }
749           }
750         }
751         PetscCall(PetscLogFlops(nz * bs * bs));
752       }
753       /* Bmat */
754       v  = bmat->a;
755       jj = bmat->j;
756       for (brow = 0; brow < mbs; brow++) {
757         grow = bs * (rstart + brow);
758         nz   = bmat->i[brow + 1] - bmat->i[brow];
759         for (bcol = 0; bcol < nz; bcol++) {
760           gcol = bs * garray[*jj];
761           jj++;
762           for (col = 0; col < bs; col++) {
763             for (row = 0; row < bs; row++) {
764               vabs = PetscAbsScalar(*v);
765               v++;
766               rsum[gcol + col] += vabs;
767               rsum[grow + row] += vabs;
768             }
769           }
770         }
771         PetscCall(PetscLogFlops(nz * bs * bs));
772       }
773       PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
774       *norm = 0.0;
775       for (col = 0; col < mat->cmap->N; col++) {
776         if (rsum2[col] > *norm) *norm = rsum2[col];
777       }
778       PetscCall(PetscFree2(rsum, rsum2));
779     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
780   }
781   PetscFunctionReturn(PETSC_SUCCESS);
782 }
783 
784 static PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
785 {
786   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
787   PetscInt      nstash, reallocs;
788 
789   PetscFunctionBegin;
790   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
791 
792   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
793   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
794   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
795   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
796   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
797   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
798   PetscFunctionReturn(PETSC_SUCCESS);
799 }
800 
801 static PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
802 {
803   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
804   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
805   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
806   PetscInt     *row, *col;
807   PetscBool     other_disassembled;
808   PetscMPIInt   n;
809   PetscBool     r1, r2, r3;
810   MatScalar    *val;
811 
812   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
813   PetscFunctionBegin;
814   if (!baij->donotstash && !mat->nooffprocentries) {
815     while (1) {
816       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
817       if (!flg) break;
818 
819       for (i = 0; i < n;) {
820         /* Now identify the consecutive vals belonging to the same row */
821         for (j = i, rstart = row[j]; j < n; j++) {
822           if (row[j] != rstart) break;
823         }
824         if (j < n) ncols = j - i;
825         else ncols = n - i;
826         /* Now assemble all these values with a single function call */
827         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
828         i = j;
829       }
830     }
831     PetscCall(MatStashScatterEnd_Private(&mat->stash));
832     /* Now process the block-stash. Since the values are stashed column-oriented,
833        set the row-oriented flag to column-oriented, and after MatSetValues()
834        restore the original flags */
835     r1 = baij->roworiented;
836     r2 = a->roworiented;
837     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
838 
839     baij->roworiented = PETSC_FALSE;
840     a->roworiented    = PETSC_FALSE;
841 
842     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
843     while (1) {
844       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
845       if (!flg) break;
846 
847       for (i = 0; i < n;) {
848         /* Now identify the consecutive vals belonging to the same row */
849         for (j = i, rstart = row[j]; j < n; j++) {
850           if (row[j] != rstart) break;
851         }
852         if (j < n) ncols = j - i;
853         else ncols = n - i;
854         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
855         i = j;
856       }
857     }
858     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
859 
860     baij->roworiented = r1;
861     a->roworiented    = r2;
862 
863     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
864   }
865 
866   PetscCall(MatAssemblyBegin(baij->A, mode));
867   PetscCall(MatAssemblyEnd(baij->A, mode));
868 
869   /* determine if any processor has disassembled, if so we must
870      also disassemble ourselves, in order that we may reassemble. */
871   /*
872      if nonzero structure of submatrix B cannot change then we know that
873      no processor disassembled thus we can skip this stuff
874   */
875   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
876     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
877     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
878   }
879 
880   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
881   PetscCall(MatAssemblyBegin(baij->B, mode));
882   PetscCall(MatAssemblyEnd(baij->B, mode));
883 
884   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
885 
886   baij->rowvalues = NULL;
887 
888   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
889   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
890     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
891     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
892   }
893   PetscFunctionReturn(PETSC_SUCCESS);
894 }
895 
896 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
897 #include <petscdraw.h>
898 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
899 {
900   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
901   PetscInt          bs   = mat->rmap->bs;
902   PetscMPIInt       rank = baij->rank;
903   PetscBool         iascii, isdraw;
904   PetscViewer       sviewer;
905   PetscViewerFormat format;
906 
907   PetscFunctionBegin;
908   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
909   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
910   if (iascii) {
911     PetscCall(PetscViewerGetFormat(viewer, &format));
912     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
913       MatInfo info;
914       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
915       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
916       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
917       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
918                                                    mat->rmap->bs, (double)info.memory));
919       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
920       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
921       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
922       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
923       PetscCall(PetscViewerFlush(viewer));
924       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
925       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
926       PetscCall(VecScatterView(baij->Mvctx, viewer));
927       PetscFunctionReturn(PETSC_SUCCESS);
928     } else if (format == PETSC_VIEWER_ASCII_INFO) {
929       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
930       PetscFunctionReturn(PETSC_SUCCESS);
931     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
932       PetscFunctionReturn(PETSC_SUCCESS);
933     }
934   }
935 
936   if (isdraw) {
937     PetscDraw draw;
938     PetscBool isnull;
939     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
940     PetscCall(PetscDrawIsNull(draw, &isnull));
941     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
942   }
943 
944   {
945     /* assemble the entire matrix onto first processor. */
946     Mat           A;
947     Mat_SeqSBAIJ *Aloc;
948     Mat_SeqBAIJ  *Bloc;
949     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
950     MatScalar    *a;
951     const char   *matname;
952 
953     /* Should this be the same type as mat? */
954     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
955     if (rank == 0) {
956       PetscCall(MatSetSizes(A, M, N, M, N));
957     } else {
958       PetscCall(MatSetSizes(A, 0, 0, M, N));
959     }
960     PetscCall(MatSetType(A, MATMPISBAIJ));
961     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
962     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
963 
964     /* copy over the A part */
965     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
966     ai   = Aloc->i;
967     aj   = Aloc->j;
968     a    = Aloc->a;
969     PetscCall(PetscMalloc1(bs, &rvals));
970 
971     for (i = 0; i < mbs; i++) {
972       rvals[0] = bs * (baij->rstartbs + i);
973       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
974       for (j = ai[i]; j < ai[i + 1]; j++) {
975         col = (baij->cstartbs + aj[j]) * bs;
976         for (k = 0; k < bs; k++) {
977           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
978           col++;
979           a += bs;
980         }
981       }
982     }
983     /* copy over the B part */
984     Bloc = (Mat_SeqBAIJ *)baij->B->data;
985     ai   = Bloc->i;
986     aj   = Bloc->j;
987     a    = Bloc->a;
988     for (i = 0; i < mbs; i++) {
989       rvals[0] = bs * (baij->rstartbs + i);
990       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
991       for (j = ai[i]; j < ai[i + 1]; j++) {
992         col = baij->garray[aj[j]] * bs;
993         for (k = 0; k < bs; k++) {
994           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
995           col++;
996           a += bs;
997         }
998       }
999     }
1000     PetscCall(PetscFree(rvals));
1001     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1002     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1003     /*
1004        Everyone has to call to draw the matrix since the graphics waits are
1005        synchronized across all processors that share the PetscDraw object
1006     */
1007     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1008     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1009     if (rank == 0) {
1010       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)A->data)->A, matname));
1011       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)A->data)->A, sviewer));
1012     }
1013     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1014     PetscCall(MatDestroy(&A));
1015   }
1016   PetscFunctionReturn(PETSC_SUCCESS);
1017 }
1018 
1019 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1020 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
1021 
1022 static PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
1023 {
1024   PetscBool iascii, isdraw, issocket, isbinary;
1025 
1026   PetscFunctionBegin;
1027   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1028   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1029   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1030   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1031   if (iascii || isdraw || issocket) {
1032     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
1033   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
1034   PetscFunctionReturn(PETSC_SUCCESS);
1035 }
1036 
1037 #if defined(PETSC_USE_COMPLEX)
1038 static PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1039 {
1040   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1041   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1042   PetscScalar       *from;
1043   const PetscScalar *x;
1044 
1045   PetscFunctionBegin;
1046   /* diagonal part */
1047   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1048   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1049   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1050   PetscCall(VecZeroEntries(a->slvec1b));
1051 
1052   /* subdiagonal part */
1053   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1054   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1055 
1056   /* copy x into the vec slvec0 */
1057   PetscCall(VecGetArray(a->slvec0, &from));
1058   PetscCall(VecGetArrayRead(xx, &x));
1059 
1060   PetscCall(PetscArraycpy(from, x, bs * mbs));
1061   PetscCall(VecRestoreArray(a->slvec0, &from));
1062   PetscCall(VecRestoreArrayRead(xx, &x));
1063 
1064   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1065   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1066   /* supperdiagonal part */
1067   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1068   PetscFunctionReturn(PETSC_SUCCESS);
1069 }
1070 #endif
1071 
1072 static PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1073 {
1074   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1075   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1076   PetscScalar       *from;
1077   const PetscScalar *x;
1078 
1079   PetscFunctionBegin;
1080   /* diagonal part */
1081   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1082   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1083   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1084   PetscCall(VecZeroEntries(a->slvec1b));
1085 
1086   /* subdiagonal part */
1087   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1088 
1089   /* copy x into the vec slvec0 */
1090   PetscCall(VecGetArray(a->slvec0, &from));
1091   PetscCall(VecGetArrayRead(xx, &x));
1092 
1093   PetscCall(PetscArraycpy(from, x, bs * mbs));
1094   PetscCall(VecRestoreArray(a->slvec0, &from));
1095   PetscCall(VecRestoreArrayRead(xx, &x));
1096 
1097   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1098   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1099   /* supperdiagonal part */
1100   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1101   PetscFunctionReturn(PETSC_SUCCESS);
1102 }
1103 
1104 #if PetscDefined(USE_COMPLEX)
1105 static PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1106 {
1107   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1108   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1109   PetscScalar       *from;
1110   const PetscScalar *x;
1111 
1112   PetscFunctionBegin;
1113   /* diagonal part */
1114   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1115   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1116   PetscCall(VecZeroEntries(a->slvec1b));
1117 
1118   /* subdiagonal part */
1119   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1120   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1121 
1122   /* copy x into the vec slvec0 */
1123   PetscCall(VecGetArray(a->slvec0, &from));
1124   PetscCall(VecGetArrayRead(xx, &x));
1125   PetscCall(PetscArraycpy(from, x, bs * mbs));
1126   PetscCall(VecRestoreArray(a->slvec0, &from));
1127 
1128   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1129   PetscCall(VecRestoreArrayRead(xx, &x));
1130   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1131 
1132   /* supperdiagonal part */
1133   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1134   PetscFunctionReturn(PETSC_SUCCESS);
1135 }
1136 #endif
1137 
1138 static PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1139 {
1140   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1141   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1142   PetscScalar       *from;
1143   const PetscScalar *x;
1144 
1145   PetscFunctionBegin;
1146   /* diagonal part */
1147   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1148   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1149   PetscCall(VecZeroEntries(a->slvec1b));
1150 
1151   /* subdiagonal part */
1152   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1153 
1154   /* copy x into the vec slvec0 */
1155   PetscCall(VecGetArray(a->slvec0, &from));
1156   PetscCall(VecGetArrayRead(xx, &x));
1157   PetscCall(PetscArraycpy(from, x, bs * mbs));
1158   PetscCall(VecRestoreArray(a->slvec0, &from));
1159 
1160   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1161   PetscCall(VecRestoreArrayRead(xx, &x));
1162   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1163 
1164   /* supperdiagonal part */
1165   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1166   PetscFunctionReturn(PETSC_SUCCESS);
1167 }
1168 
1169 /*
1170   This only works correctly for square matrices where the subblock A->A is the
1171    diagonal block
1172 */
1173 static PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1174 {
1175   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1176 
1177   PetscFunctionBegin;
1178   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1179   PetscCall(MatGetDiagonal(a->A, v));
1180   PetscFunctionReturn(PETSC_SUCCESS);
1181 }
1182 
1183 static PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1184 {
1185   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1186 
1187   PetscFunctionBegin;
1188   PetscCall(MatScale(a->A, aa));
1189   PetscCall(MatScale(a->B, aa));
1190   PetscFunctionReturn(PETSC_SUCCESS);
1191 }
1192 
1193 static PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1194 {
1195   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1196   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1197   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1198   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1199   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1200 
1201   PetscFunctionBegin;
1202   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1203   mat->getrowactive = PETSC_TRUE;
1204 
1205   if (!mat->rowvalues && (idx || v)) {
1206     /*
1207         allocate enough space to hold information from the longest row.
1208     */
1209     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1210     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1211     PetscInt      max = 1, mbs = mat->mbs, tmp;
1212     for (i = 0; i < mbs; i++) {
1213       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1214       if (max < tmp) max = tmp;
1215     }
1216     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1217   }
1218 
1219   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1220   lrow = row - brstart; /* local row index */
1221 
1222   pvA = &vworkA;
1223   pcA = &cworkA;
1224   pvB = &vworkB;
1225   pcB = &cworkB;
1226   if (!v) {
1227     pvA = NULL;
1228     pvB = NULL;
1229   }
1230   if (!idx) {
1231     pcA = NULL;
1232     if (!v) pcB = NULL;
1233   }
1234   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1235   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1236   nztot = nzA + nzB;
1237 
1238   cmap = mat->garray;
1239   if (v || idx) {
1240     if (nztot) {
1241       /* Sort by increasing column numbers, assuming A and B already sorted */
1242       PetscInt imark = -1;
1243       if (v) {
1244         *v = v_p = mat->rowvalues;
1245         for (i = 0; i < nzB; i++) {
1246           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1247           else break;
1248         }
1249         imark = i;
1250         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1251         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1252       }
1253       if (idx) {
1254         *idx = idx_p = mat->rowindices;
1255         if (imark > -1) {
1256           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1257         } else {
1258           for (i = 0; i < nzB; i++) {
1259             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1260             else break;
1261           }
1262           imark = i;
1263         }
1264         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1265         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1266       }
1267     } else {
1268       if (idx) *idx = NULL;
1269       if (v) *v = NULL;
1270     }
1271   }
1272   *nz = nztot;
1273   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1274   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1275   PetscFunctionReturn(PETSC_SUCCESS);
1276 }
1277 
1278 static PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1279 {
1280   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1281 
1282   PetscFunctionBegin;
1283   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1284   baij->getrowactive = PETSC_FALSE;
1285   PetscFunctionReturn(PETSC_SUCCESS);
1286 }
1287 
1288 static PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1289 {
1290   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1291   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1292 
1293   PetscFunctionBegin;
1294   aA->getrow_utriangular = PETSC_TRUE;
1295   PetscFunctionReturn(PETSC_SUCCESS);
1296 }
1297 static PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1298 {
1299   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1300   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1301 
1302   PetscFunctionBegin;
1303   aA->getrow_utriangular = PETSC_FALSE;
1304   PetscFunctionReturn(PETSC_SUCCESS);
1305 }
1306 
1307 static PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1308 {
1309   PetscFunctionBegin;
1310   if (PetscDefined(USE_COMPLEX)) {
1311     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1312 
1313     PetscCall(MatConjugate(a->A));
1314     PetscCall(MatConjugate(a->B));
1315   }
1316   PetscFunctionReturn(PETSC_SUCCESS);
1317 }
1318 
1319 static PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1320 {
1321   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1322 
1323   PetscFunctionBegin;
1324   PetscCall(MatRealPart(a->A));
1325   PetscCall(MatRealPart(a->B));
1326   PetscFunctionReturn(PETSC_SUCCESS);
1327 }
1328 
1329 static PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1330 {
1331   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1332 
1333   PetscFunctionBegin;
1334   PetscCall(MatImaginaryPart(a->A));
1335   PetscCall(MatImaginaryPart(a->B));
1336   PetscFunctionReturn(PETSC_SUCCESS);
1337 }
1338 
1339 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1340    Input: isrow       - distributed(parallel),
1341           iscol_local - locally owned (seq)
1342 */
1343 static PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1344 {
1345   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
1346   const PetscInt *ptr1, *ptr2;
1347 
1348   PetscFunctionBegin;
1349   *flg = PETSC_FALSE;
1350   PetscCall(ISGetLocalSize(isrow, &sz1));
1351   PetscCall(ISGetLocalSize(iscol_local, &sz2));
1352   if (sz1 > sz2) PetscFunctionReturn(PETSC_SUCCESS);
1353 
1354   PetscCall(ISGetIndices(isrow, &ptr1));
1355   PetscCall(ISGetIndices(iscol_local, &ptr2));
1356 
1357   PetscCall(PetscMalloc1(sz1, &a1));
1358   PetscCall(PetscMalloc1(sz2, &a2));
1359   PetscCall(PetscArraycpy(a1, ptr1, sz1));
1360   PetscCall(PetscArraycpy(a2, ptr2, sz2));
1361   PetscCall(PetscSortInt(sz1, a1));
1362   PetscCall(PetscSortInt(sz2, a2));
1363 
1364   nmatch = 0;
1365   k      = 0;
1366   for (i = 0; i < sz1; i++) {
1367     for (j = k; j < sz2; j++) {
1368       if (a1[i] == a2[j]) {
1369         k = j;
1370         nmatch++;
1371         break;
1372       }
1373     }
1374   }
1375   PetscCall(ISRestoreIndices(isrow, &ptr1));
1376   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1377   PetscCall(PetscFree(a1));
1378   PetscCall(PetscFree(a2));
1379   if (nmatch < sz1) {
1380     *flg = PETSC_FALSE;
1381   } else {
1382     *flg = PETSC_TRUE;
1383   }
1384   PetscFunctionReturn(PETSC_SUCCESS);
1385 }
1386 
1387 static PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1388 {
1389   Mat       C[2];
1390   IS        iscol_local, isrow_local;
1391   PetscInt  csize, csize_local, rsize;
1392   PetscBool isequal, issorted, isidentity = PETSC_FALSE;
1393 
1394   PetscFunctionBegin;
1395   PetscCall(ISGetLocalSize(iscol, &csize));
1396   PetscCall(ISGetLocalSize(isrow, &rsize));
1397   if (call == MAT_REUSE_MATRIX) {
1398     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1399     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1400   } else {
1401     PetscCall(ISAllGather(iscol, &iscol_local));
1402     PetscCall(ISSorted(iscol_local, &issorted));
1403     PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must be sorted");
1404   }
1405   PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1406   if (!isequal) {
1407     PetscCall(ISGetLocalSize(iscol_local, &csize_local));
1408     isidentity = (PetscBool)(mat->cmap->N == csize_local);
1409     if (!isidentity) {
1410       if (call == MAT_REUSE_MATRIX) {
1411         PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather_other", (PetscObject *)&isrow_local));
1412         PetscCheck(isrow_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1413       } else {
1414         PetscCall(ISAllGather(isrow, &isrow_local));
1415         PetscCall(ISSorted(isrow_local, &issorted));
1416         PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, isrow must be sorted");
1417       }
1418     }
1419   }
1420   /* now call MatCreateSubMatrix_MPIBAIJ() */
1421   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, isequal || isidentity ? call : MAT_INITIAL_MATRIX, isequal || isidentity ? newmat : C, (PetscBool)(isequal || isidentity)));
1422   if (!isequal && !isidentity) {
1423     if (call == MAT_INITIAL_MATRIX) {
1424       IS       intersect;
1425       PetscInt ni;
1426 
1427       PetscCall(ISIntersect(isrow_local, iscol_local, &intersect));
1428       PetscCall(ISGetLocalSize(intersect, &ni));
1429       PetscCall(ISDestroy(&intersect));
1430       PetscCheck(ni == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create such a submatrix: for symmetric format, when requesting an off-diagonal submatrix, isrow and iscol should have an empty intersection (number of common indices is %" PetscInt_FMT ")", ni);
1431     }
1432     PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, iscol, isrow_local, rsize, MAT_INITIAL_MATRIX, C + 1, PETSC_FALSE));
1433     PetscCall(MatTranspose(C[1], MAT_INPLACE_MATRIX, C + 1));
1434     PetscCall(MatAXPY(C[0], 1.0, C[1], DIFFERENT_NONZERO_PATTERN));
1435     if (call == MAT_REUSE_MATRIX) PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN));
1436     else if (mat->rmap->bs == 1) PetscCall(MatConvert(C[0], MATAIJ, MAT_INITIAL_MATRIX, newmat));
1437     else PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN));
1438     PetscCall(MatDestroy(C));
1439     PetscCall(MatDestroy(C + 1));
1440   }
1441   if (call == MAT_INITIAL_MATRIX) {
1442     if (!isequal && !isidentity) {
1443       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather_other", (PetscObject)isrow_local));
1444       PetscCall(ISDestroy(&isrow_local));
1445     }
1446     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1447     PetscCall(ISDestroy(&iscol_local));
1448   }
1449   PetscFunctionReturn(PETSC_SUCCESS);
1450 }
1451 
1452 static PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1453 {
1454   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1455 
1456   PetscFunctionBegin;
1457   PetscCall(MatZeroEntries(l->A));
1458   PetscCall(MatZeroEntries(l->B));
1459   PetscFunctionReturn(PETSC_SUCCESS);
1460 }
1461 
1462 static PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1463 {
1464   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1465   Mat            A = a->A, B = a->B;
1466   PetscLogDouble isend[5], irecv[5];
1467 
1468   PetscFunctionBegin;
1469   info->block_size = (PetscReal)matin->rmap->bs;
1470 
1471   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1472 
1473   isend[0] = info->nz_used;
1474   isend[1] = info->nz_allocated;
1475   isend[2] = info->nz_unneeded;
1476   isend[3] = info->memory;
1477   isend[4] = info->mallocs;
1478 
1479   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1480 
1481   isend[0] += info->nz_used;
1482   isend[1] += info->nz_allocated;
1483   isend[2] += info->nz_unneeded;
1484   isend[3] += info->memory;
1485   isend[4] += info->mallocs;
1486   if (flag == MAT_LOCAL) {
1487     info->nz_used      = isend[0];
1488     info->nz_allocated = isend[1];
1489     info->nz_unneeded  = isend[2];
1490     info->memory       = isend[3];
1491     info->mallocs      = isend[4];
1492   } else if (flag == MAT_GLOBAL_MAX) {
1493     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1494 
1495     info->nz_used      = irecv[0];
1496     info->nz_allocated = irecv[1];
1497     info->nz_unneeded  = irecv[2];
1498     info->memory       = irecv[3];
1499     info->mallocs      = irecv[4];
1500   } else if (flag == MAT_GLOBAL_SUM) {
1501     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1502 
1503     info->nz_used      = irecv[0];
1504     info->nz_allocated = irecv[1];
1505     info->nz_unneeded  = irecv[2];
1506     info->memory       = irecv[3];
1507     info->mallocs      = irecv[4];
1508   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1509   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1510   info->fill_ratio_needed = 0;
1511   info->factor_mallocs    = 0;
1512   PetscFunctionReturn(PETSC_SUCCESS);
1513 }
1514 
1515 static PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1516 {
1517   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1518   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1519 
1520   PetscFunctionBegin;
1521   switch (op) {
1522   case MAT_NEW_NONZERO_LOCATIONS:
1523   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1524   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1525   case MAT_KEEP_NONZERO_PATTERN:
1526   case MAT_SUBMAT_SINGLEIS:
1527   case MAT_NEW_NONZERO_LOCATION_ERR:
1528     MatCheckPreallocated(A, 1);
1529     PetscCall(MatSetOption(a->A, op, flg));
1530     PetscCall(MatSetOption(a->B, op, flg));
1531     break;
1532   case MAT_ROW_ORIENTED:
1533     MatCheckPreallocated(A, 1);
1534     a->roworiented = flg;
1535 
1536     PetscCall(MatSetOption(a->A, op, flg));
1537     PetscCall(MatSetOption(a->B, op, flg));
1538     break;
1539   case MAT_FORCE_DIAGONAL_ENTRIES:
1540   case MAT_SORTED_FULL:
1541     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1542     break;
1543   case MAT_IGNORE_OFF_PROC_ENTRIES:
1544     a->donotstash = flg;
1545     break;
1546   case MAT_USE_HASH_TABLE:
1547     a->ht_flag = flg;
1548     break;
1549   case MAT_HERMITIAN:
1550     MatCheckPreallocated(A, 1);
1551     PetscCall(MatSetOption(a->A, op, flg));
1552 #if defined(PETSC_USE_COMPLEX)
1553     if (flg) { /* need different mat-vec ops */
1554       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1555       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1556       A->ops->multtranspose    = NULL;
1557       A->ops->multtransposeadd = NULL;
1558       A->symmetric             = PETSC_BOOL3_FALSE;
1559     }
1560 #endif
1561     break;
1562   case MAT_SPD:
1563   case MAT_SYMMETRIC:
1564     MatCheckPreallocated(A, 1);
1565     PetscCall(MatSetOption(a->A, op, flg));
1566 #if defined(PETSC_USE_COMPLEX)
1567     if (flg) { /* restore to use default mat-vec ops */
1568       A->ops->mult             = MatMult_MPISBAIJ;
1569       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1570       A->ops->multtranspose    = MatMult_MPISBAIJ;
1571       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1572     }
1573 #endif
1574     break;
1575   case MAT_STRUCTURALLY_SYMMETRIC:
1576     MatCheckPreallocated(A, 1);
1577     PetscCall(MatSetOption(a->A, op, flg));
1578     break;
1579   case MAT_SYMMETRY_ETERNAL:
1580   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1581     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
1582     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1583     break;
1584   case MAT_SPD_ETERNAL:
1585     break;
1586   case MAT_IGNORE_LOWER_TRIANGULAR:
1587     aA->ignore_ltriangular = flg;
1588     break;
1589   case MAT_ERROR_LOWER_TRIANGULAR:
1590     aA->ignore_ltriangular = flg;
1591     break;
1592   case MAT_GETROW_UPPERTRIANGULAR:
1593     aA->getrow_utriangular = flg;
1594     break;
1595   default:
1596     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1597   }
1598   PetscFunctionReturn(PETSC_SUCCESS);
1599 }
1600 
1601 static PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1602 {
1603   PetscFunctionBegin;
1604   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1605   if (reuse == MAT_INITIAL_MATRIX) {
1606     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1607   } else if (reuse == MAT_REUSE_MATRIX) {
1608     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1609   }
1610   PetscFunctionReturn(PETSC_SUCCESS);
1611 }
1612 
1613 static PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1614 {
1615   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1616   Mat           a = baij->A, b = baij->B;
1617   PetscInt      nv, m, n;
1618   PetscBool     flg;
1619 
1620   PetscFunctionBegin;
1621   if (ll != rr) {
1622     PetscCall(VecEqual(ll, rr, &flg));
1623     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1624   }
1625   if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1626 
1627   PetscCall(MatGetLocalSize(mat, &m, &n));
1628   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1629 
1630   PetscCall(VecGetLocalSize(rr, &nv));
1631   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1632 
1633   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1634 
1635   /* left diagonalscale the off-diagonal part */
1636   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1637 
1638   /* scale the diagonal part */
1639   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1640 
1641   /* right diagonalscale the off-diagonal part */
1642   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1643   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1644   PetscFunctionReturn(PETSC_SUCCESS);
1645 }
1646 
1647 static PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1648 {
1649   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1650 
1651   PetscFunctionBegin;
1652   PetscCall(MatSetUnfactored(a->A));
1653   PetscFunctionReturn(PETSC_SUCCESS);
1654 }
1655 
1656 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1657 
1658 static PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1659 {
1660   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1661   Mat           a, b, c, d;
1662   PetscBool     flg;
1663 
1664   PetscFunctionBegin;
1665   a = matA->A;
1666   b = matA->B;
1667   c = matB->A;
1668   d = matB->B;
1669 
1670   PetscCall(MatEqual(a, c, &flg));
1671   if (flg) PetscCall(MatEqual(b, d, &flg));
1672   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1673   PetscFunctionReturn(PETSC_SUCCESS);
1674 }
1675 
1676 static PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1677 {
1678   PetscBool isbaij;
1679 
1680   PetscFunctionBegin;
1681   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1682   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1683   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1684   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1685     PetscCall(MatGetRowUpperTriangular(A));
1686     PetscCall(MatCopy_Basic(A, B, str));
1687     PetscCall(MatRestoreRowUpperTriangular(A));
1688   } else {
1689     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1690     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1691 
1692     PetscCall(MatCopy(a->A, b->A, str));
1693     PetscCall(MatCopy(a->B, b->B, str));
1694   }
1695   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1696   PetscFunctionReturn(PETSC_SUCCESS);
1697 }
1698 
1699 static PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1700 {
1701   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1702   PetscBLASInt  bnz, one                          = 1;
1703   Mat_SeqSBAIJ *xa, *ya;
1704   Mat_SeqBAIJ  *xb, *yb;
1705 
1706   PetscFunctionBegin;
1707   if (str == SAME_NONZERO_PATTERN) {
1708     PetscScalar alpha = a;
1709     xa                = (Mat_SeqSBAIJ *)xx->A->data;
1710     ya                = (Mat_SeqSBAIJ *)yy->A->data;
1711     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1712     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1713     xb = (Mat_SeqBAIJ *)xx->B->data;
1714     yb = (Mat_SeqBAIJ *)yy->B->data;
1715     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1716     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1717     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1718   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1719     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1720     PetscCall(MatAXPY_Basic(Y, a, X, str));
1721     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1722   } else {
1723     Mat       B;
1724     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1725     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1726     PetscCall(MatGetRowUpperTriangular(X));
1727     PetscCall(MatGetRowUpperTriangular(Y));
1728     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1729     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1730     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1731     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1732     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1733     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1734     PetscCall(MatSetType(B, MATMPISBAIJ));
1735     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1736     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1737     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1738     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1739     PetscCall(MatHeaderMerge(Y, &B));
1740     PetscCall(PetscFree(nnz_d));
1741     PetscCall(PetscFree(nnz_o));
1742     PetscCall(MatRestoreRowUpperTriangular(X));
1743     PetscCall(MatRestoreRowUpperTriangular(Y));
1744   }
1745   PetscFunctionReturn(PETSC_SUCCESS);
1746 }
1747 
1748 static PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1749 {
1750   PetscInt  i;
1751   PetscBool flg;
1752 
1753   PetscFunctionBegin;
1754   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1755   for (i = 0; i < n; i++) {
1756     PetscCall(ISEqual(irow[i], icol[i], &flg));
1757     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1758   }
1759   PetscFunctionReturn(PETSC_SUCCESS);
1760 }
1761 
1762 static PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1763 {
1764   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1765   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
1766 
1767   PetscFunctionBegin;
1768   if (!Y->preallocated) {
1769     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1770   } else if (!aij->nz) {
1771     PetscInt nonew = aij->nonew;
1772     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1773     aij->nonew = nonew;
1774   }
1775   PetscCall(MatShift_Basic(Y, a));
1776   PetscFunctionReturn(PETSC_SUCCESS);
1777 }
1778 
1779 static PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1780 {
1781   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1782 
1783   PetscFunctionBegin;
1784   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
1785   PetscCall(MatMissingDiagonal(a->A, missing, d));
1786   if (d) {
1787     PetscInt rstart;
1788     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1789     *d += rstart / A->rmap->bs;
1790   }
1791   PetscFunctionReturn(PETSC_SUCCESS);
1792 }
1793 
1794 static PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1795 {
1796   PetscFunctionBegin;
1797   *a = ((Mat_MPISBAIJ *)A->data)->A;
1798   PetscFunctionReturn(PETSC_SUCCESS);
1799 }
1800 
1801 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep)
1802 {
1803   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1804 
1805   PetscFunctionBegin;
1806   PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep));       // possibly keep zero diagonal coefficients
1807   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
1808   PetscFunctionReturn(PETSC_SUCCESS);
1809 }
1810 
1811 static PetscErrorCode MatLoad_MPISBAIJ(Mat, PetscViewer);
1812 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat, Vec, PetscInt[]);
1813 static PetscErrorCode MatSOR_MPISBAIJ(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec);
1814 
1815 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1816                                        MatGetRow_MPISBAIJ,
1817                                        MatRestoreRow_MPISBAIJ,
1818                                        MatMult_MPISBAIJ,
1819                                        /*  4*/ MatMultAdd_MPISBAIJ,
1820                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1821                                        MatMultAdd_MPISBAIJ,
1822                                        NULL,
1823                                        NULL,
1824                                        NULL,
1825                                        /* 10*/ NULL,
1826                                        NULL,
1827                                        NULL,
1828                                        MatSOR_MPISBAIJ,
1829                                        MatTranspose_MPISBAIJ,
1830                                        /* 15*/ MatGetInfo_MPISBAIJ,
1831                                        MatEqual_MPISBAIJ,
1832                                        MatGetDiagonal_MPISBAIJ,
1833                                        MatDiagonalScale_MPISBAIJ,
1834                                        MatNorm_MPISBAIJ,
1835                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1836                                        MatAssemblyEnd_MPISBAIJ,
1837                                        MatSetOption_MPISBAIJ,
1838                                        MatZeroEntries_MPISBAIJ,
1839                                        /* 24*/ NULL,
1840                                        NULL,
1841                                        NULL,
1842                                        NULL,
1843                                        NULL,
1844                                        /* 29*/ MatSetUp_MPI_Hash,
1845                                        NULL,
1846                                        NULL,
1847                                        MatGetDiagonalBlock_MPISBAIJ,
1848                                        NULL,
1849                                        /* 34*/ MatDuplicate_MPISBAIJ,
1850                                        NULL,
1851                                        NULL,
1852                                        NULL,
1853                                        NULL,
1854                                        /* 39*/ MatAXPY_MPISBAIJ,
1855                                        MatCreateSubMatrices_MPISBAIJ,
1856                                        MatIncreaseOverlap_MPISBAIJ,
1857                                        MatGetValues_MPISBAIJ,
1858                                        MatCopy_MPISBAIJ,
1859                                        /* 44*/ NULL,
1860                                        MatScale_MPISBAIJ,
1861                                        MatShift_MPISBAIJ,
1862                                        NULL,
1863                                        NULL,
1864                                        /* 49*/ NULL,
1865                                        NULL,
1866                                        NULL,
1867                                        NULL,
1868                                        NULL,
1869                                        /* 54*/ NULL,
1870                                        NULL,
1871                                        MatSetUnfactored_MPISBAIJ,
1872                                        NULL,
1873                                        MatSetValuesBlocked_MPISBAIJ,
1874                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1875                                        NULL,
1876                                        NULL,
1877                                        NULL,
1878                                        NULL,
1879                                        /* 64*/ NULL,
1880                                        NULL,
1881                                        NULL,
1882                                        NULL,
1883                                        NULL,
1884                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1885                                        NULL,
1886                                        MatConvert_MPISBAIJ_Basic,
1887                                        NULL,
1888                                        NULL,
1889                                        /* 74*/ NULL,
1890                                        NULL,
1891                                        NULL,
1892                                        NULL,
1893                                        NULL,
1894                                        /* 79*/ NULL,
1895                                        NULL,
1896                                        NULL,
1897                                        NULL,
1898                                        MatLoad_MPISBAIJ,
1899                                        /* 84*/ NULL,
1900                                        NULL,
1901                                        NULL,
1902                                        NULL,
1903                                        NULL,
1904                                        /* 89*/ NULL,
1905                                        NULL,
1906                                        NULL,
1907                                        NULL,
1908                                        NULL,
1909                                        /* 94*/ NULL,
1910                                        NULL,
1911                                        NULL,
1912                                        NULL,
1913                                        NULL,
1914                                        /* 99*/ NULL,
1915                                        NULL,
1916                                        NULL,
1917                                        MatConjugate_MPISBAIJ,
1918                                        NULL,
1919                                        /*104*/ NULL,
1920                                        MatRealPart_MPISBAIJ,
1921                                        MatImaginaryPart_MPISBAIJ,
1922                                        MatGetRowUpperTriangular_MPISBAIJ,
1923                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1924                                        /*109*/ NULL,
1925                                        NULL,
1926                                        NULL,
1927                                        NULL,
1928                                        MatMissingDiagonal_MPISBAIJ,
1929                                        /*114*/ NULL,
1930                                        NULL,
1931                                        NULL,
1932                                        NULL,
1933                                        NULL,
1934                                        /*119*/ NULL,
1935                                        NULL,
1936                                        NULL,
1937                                        NULL,
1938                                        NULL,
1939                                        /*124*/ NULL,
1940                                        NULL,
1941                                        NULL,
1942                                        NULL,
1943                                        NULL,
1944                                        /*129*/ NULL,
1945                                        NULL,
1946                                        NULL,
1947                                        NULL,
1948                                        NULL,
1949                                        /*134*/ NULL,
1950                                        NULL,
1951                                        NULL,
1952                                        NULL,
1953                                        NULL,
1954                                        /*139*/ MatSetBlockSizes_Default,
1955                                        NULL,
1956                                        NULL,
1957                                        NULL,
1958                                        NULL,
1959                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1960                                        NULL,
1961                                        NULL,
1962                                        NULL,
1963                                        NULL,
1964                                        NULL,
1965                                        /*150*/ NULL,
1966                                        MatEliminateZeros_MPISBAIJ,
1967                                        NULL};
1968 
1969 static PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1970 {
1971   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1972   PetscInt      i, mbs, Mbs;
1973   PetscMPIInt   size;
1974 
1975   PetscFunctionBegin;
1976   if (B->hash_active) {
1977     B->ops[0]      = b->cops;
1978     B->hash_active = PETSC_FALSE;
1979   }
1980   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1981   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
1982   PetscCall(PetscLayoutSetUp(B->rmap));
1983   PetscCall(PetscLayoutSetUp(B->cmap));
1984   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1985   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1986   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1987 
1988   mbs = B->rmap->n / bs;
1989   Mbs = B->rmap->N / bs;
1990   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1991 
1992   B->rmap->bs = bs;
1993   b->bs2      = bs * bs;
1994   b->mbs      = mbs;
1995   b->Mbs      = Mbs;
1996   b->nbs      = B->cmap->n / bs;
1997   b->Nbs      = B->cmap->N / bs;
1998 
1999   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2000   b->rstartbs = B->rmap->rstart / bs;
2001   b->rendbs   = B->rmap->rend / bs;
2002 
2003   b->cstartbs = B->cmap->rstart / bs;
2004   b->cendbs   = B->cmap->rend / bs;
2005 
2006 #if defined(PETSC_USE_CTABLE)
2007   PetscCall(PetscHMapIDestroy(&b->colmap));
2008 #else
2009   PetscCall(PetscFree(b->colmap));
2010 #endif
2011   PetscCall(PetscFree(b->garray));
2012   PetscCall(VecDestroy(&b->lvec));
2013   PetscCall(VecScatterDestroy(&b->Mvctx));
2014   PetscCall(VecDestroy(&b->slvec0));
2015   PetscCall(VecDestroy(&b->slvec0b));
2016   PetscCall(VecDestroy(&b->slvec1));
2017   PetscCall(VecDestroy(&b->slvec1a));
2018   PetscCall(VecDestroy(&b->slvec1b));
2019   PetscCall(VecScatterDestroy(&b->sMvctx));
2020 
2021   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2022 
2023   MatSeqXAIJGetOptions_Private(b->B);
2024   PetscCall(MatDestroy(&b->B));
2025   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2026   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2027   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2028   MatSeqXAIJRestoreOptions_Private(b->B);
2029 
2030   MatSeqXAIJGetOptions_Private(b->A);
2031   PetscCall(MatDestroy(&b->A));
2032   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2033   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2034   PetscCall(MatSetType(b->A, MATSEQSBAIJ));
2035   MatSeqXAIJRestoreOptions_Private(b->A);
2036 
2037   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2038   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2039 
2040   B->preallocated  = PETSC_TRUE;
2041   B->was_assembled = PETSC_FALSE;
2042   B->assembled     = PETSC_FALSE;
2043   PetscFunctionReturn(PETSC_SUCCESS);
2044 }
2045 
2046 static PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2047 {
2048   PetscInt        m, rstart, cend;
2049   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2050   const PetscInt *JJ          = NULL;
2051   PetscScalar    *values      = NULL;
2052   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
2053   PetscBool       nooffprocentries;
2054 
2055   PetscFunctionBegin;
2056   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
2057   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2058   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2059   PetscCall(PetscLayoutSetUp(B->rmap));
2060   PetscCall(PetscLayoutSetUp(B->cmap));
2061   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2062   m      = B->rmap->n / bs;
2063   rstart = B->rmap->rstart / bs;
2064   cend   = B->cmap->rend / bs;
2065 
2066   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2067   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2068   for (i = 0; i < m; i++) {
2069     nz = ii[i + 1] - ii[i];
2070     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2071     /* count the ones on the diagonal and above, split into diagonal and off-diagonal portions. */
2072     JJ = jj + ii[i];
2073     bd = 0;
2074     for (j = 0; j < nz; j++) {
2075       if (*JJ >= i + rstart) break;
2076       JJ++;
2077       bd++;
2078     }
2079     d = 0;
2080     for (; j < nz; j++) {
2081       if (*JJ++ >= cend) break;
2082       d++;
2083     }
2084     d_nnz[i] = d;
2085     o_nnz[i] = nz - d - bd;
2086     nz       = nz - bd;
2087     nz_max   = PetscMax(nz_max, nz);
2088   }
2089   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2090   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
2091   PetscCall(PetscFree2(d_nnz, o_nnz));
2092 
2093   values = (PetscScalar *)V;
2094   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2095   for (i = 0; i < m; i++) {
2096     PetscInt        row   = i + rstart;
2097     PetscInt        ncols = ii[i + 1] - ii[i];
2098     const PetscInt *icols = jj + ii[i];
2099     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2100       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2101       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2102     } else { /* block ordering does not match so we can only insert one block at a time. */
2103       PetscInt j;
2104       for (j = 0; j < ncols; j++) {
2105         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2106         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2107       }
2108     }
2109   }
2110 
2111   if (!V) PetscCall(PetscFree(values));
2112   nooffprocentries    = B->nooffprocentries;
2113   B->nooffprocentries = PETSC_TRUE;
2114   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2115   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2116   B->nooffprocentries = nooffprocentries;
2117 
2118   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2119   PetscFunctionReturn(PETSC_SUCCESS);
2120 }
2121 
2122 /*MC
2123    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2124    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2125    the matrix is stored.
2126 
2127    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2128    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2129 
2130    Options Database Key:
2131 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2132 
2133    Level: beginner
2134 
2135    Note:
2136      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2137      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2138 
2139 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2140 M*/
2141 
2142 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2143 {
2144   Mat_MPISBAIJ *b;
2145   PetscBool     flg = PETSC_FALSE;
2146 
2147   PetscFunctionBegin;
2148   PetscCall(PetscNew(&b));
2149   B->data   = (void *)b;
2150   B->ops[0] = MatOps_Values;
2151 
2152   B->ops->destroy = MatDestroy_MPISBAIJ;
2153   B->ops->view    = MatView_MPISBAIJ;
2154   B->assembled    = PETSC_FALSE;
2155   B->insertmode   = NOT_SET_VALUES;
2156 
2157   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2158   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2159 
2160   /* build local table of row and column ownerships */
2161   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2162 
2163   /* build cache for off array entries formed */
2164   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2165 
2166   b->donotstash  = PETSC_FALSE;
2167   b->colmap      = NULL;
2168   b->garray      = NULL;
2169   b->roworiented = PETSC_TRUE;
2170 
2171   /* stuff used in block assembly */
2172   b->barray = NULL;
2173 
2174   /* stuff used for matrix vector multiply */
2175   b->lvec    = NULL;
2176   b->Mvctx   = NULL;
2177   b->slvec0  = NULL;
2178   b->slvec0b = NULL;
2179   b->slvec1  = NULL;
2180   b->slvec1a = NULL;
2181   b->slvec1b = NULL;
2182   b->sMvctx  = NULL;
2183 
2184   /* stuff for MatGetRow() */
2185   b->rowindices   = NULL;
2186   b->rowvalues    = NULL;
2187   b->getrowactive = PETSC_FALSE;
2188 
2189   /* hash table stuff */
2190   b->ht           = NULL;
2191   b->hd           = NULL;
2192   b->ht_size      = 0;
2193   b->ht_flag      = PETSC_FALSE;
2194   b->ht_fact      = 0;
2195   b->ht_total_ct  = 0;
2196   b->ht_insert_ct = 0;
2197 
2198   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2199   b->ijonly = PETSC_FALSE;
2200 
2201   b->in_loc = NULL;
2202   b->v_loc  = NULL;
2203   b->n_loc  = 0;
2204 
2205   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2206   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2207   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2208   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2209 #if defined(PETSC_HAVE_ELEMENTAL)
2210   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2211 #endif
2212 #if defined(PETSC_HAVE_SCALAPACK)
2213   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2214 #endif
2215   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2216   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2217 
2218   B->symmetric                   = PETSC_BOOL3_TRUE;
2219   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2220   B->symmetry_eternal            = PETSC_TRUE;
2221   B->structural_symmetry_eternal = PETSC_TRUE;
2222 #if defined(PETSC_USE_COMPLEX)
2223   B->hermitian = PETSC_BOOL3_FALSE;
2224 #else
2225   B->hermitian = PETSC_BOOL3_TRUE;
2226 #endif
2227 
2228   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2229   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2230   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2231   if (flg) {
2232     PetscReal fact = 1.39;
2233     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2234     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2235     if (fact <= 1.0) fact = 1.39;
2236     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2237     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2238   }
2239   PetscOptionsEnd();
2240   PetscFunctionReturn(PETSC_SUCCESS);
2241 }
2242 
2243 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2244 /*MC
2245    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2246 
2247    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2248    and `MATMPISBAIJ` otherwise.
2249 
2250    Options Database Key:
2251 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2252 
2253   Level: beginner
2254 
2255 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2256 M*/
2257 
2258 /*@C
2259   MatMPISBAIJSetPreallocation - For good matrix assembly performance
2260   the user should preallocate the matrix storage by setting the parameters
2261   d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2262   performance can be increased by more than a factor of 50.
2263 
2264   Collective
2265 
2266   Input Parameters:
2267 + B     - the matrix
2268 . bs    - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2269           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2270 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2271           submatrix  (same for all local rows)
2272 . d_nnz - array containing the number of block nonzeros in the various block rows
2273           in the upper triangular and diagonal part of the in diagonal portion of the local
2274           (possibly different for each block row) or `NULL`.  If you plan to factor the matrix you must leave room
2275           for the diagonal entry and set a value even if it is zero.
2276 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2277           submatrix (same for all local rows).
2278 - o_nnz - array containing the number of nonzeros in the various block rows of the
2279           off-diagonal portion of the local submatrix that is right of the diagonal
2280           (possibly different for each block row) or `NULL`.
2281 
2282   Options Database Keys:
2283 + -mat_no_unroll  - uses code that does not unroll the loops in the
2284                     block calculations (much slower)
2285 - -mat_block_size - size of the blocks to use
2286 
2287   Level: intermediate
2288 
2289   Notes:
2290 
2291   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2292   than it must be used on all processors that share the object for that argument.
2293 
2294   If the *_nnz parameter is given then the *_nz parameter is ignored
2295 
2296   Storage Information:
2297   For a square global matrix we define each processor's diagonal portion
2298   to be its local rows and the corresponding columns (a square submatrix);
2299   each processor's off-diagonal portion encompasses the remainder of the
2300   local matrix (a rectangular submatrix).
2301 
2302   The user can specify preallocated storage for the diagonal part of
2303   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
2304   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2305   memory allocation.  Likewise, specify preallocated storage for the
2306   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2307 
2308   You can call `MatGetInfo()` to get information on how effective the preallocation was;
2309   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2310   You can also run with the option `-info` and look for messages with the string
2311   malloc in them to see if additional memory allocation was needed.
2312 
2313   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2314   the figure below we depict these three local rows and all columns (0-11).
2315 
2316 .vb
2317            0 1 2 3 4 5 6 7 8 9 10 11
2318           --------------------------
2319    row 3  |. . . d d d o o o o  o  o
2320    row 4  |. . . d d d o o o o  o  o
2321    row 5  |. . . d d d o o o o  o  o
2322           --------------------------
2323 .ve
2324 
2325   Thus, any entries in the d locations are stored in the d (diagonal)
2326   submatrix, and any entries in the o locations are stored in the
2327   o (off-diagonal) submatrix.  Note that the d matrix is stored in
2328   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2329 
2330   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2331   plus the diagonal part of the d matrix,
2332   and `o_nz` should indicate the number of block nonzeros per row in the o matrix
2333 
2334   In general, for PDE problems in which most nonzeros are near the diagonal,
2335   one expects `d_nz` >> `o_nz`.
2336 
2337 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2338 @*/
2339 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2340 {
2341   PetscFunctionBegin;
2342   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2343   PetscValidType(B, 1);
2344   PetscValidLogicalCollectiveInt(B, bs, 2);
2345   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2346   PetscFunctionReturn(PETSC_SUCCESS);
2347 }
2348 
2349 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2350 /*@C
2351   MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2352   (block compressed row).  For good matrix assembly performance
2353   the user should preallocate the matrix storage by setting the parameters
2354   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
2355 
2356   Collective
2357 
2358   Input Parameters:
2359 + comm  - MPI communicator
2360 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2361           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2362 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
2363           This value should be the same as the local size used in creating the
2364           y vector for the matrix-vector product y = Ax.
2365 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
2366           This value should be the same as the local size used in creating the
2367           x vector for the matrix-vector product y = Ax.
2368 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2369 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2370 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2371           submatrix (same for all local rows)
2372 . d_nnz - array containing the number of block nonzeros in the various block rows
2373           in the upper triangular portion of the in diagonal portion of the local
2374           (possibly different for each block block row) or `NULL`.
2375           If you plan to factor the matrix you must leave room for the diagonal entry and
2376           set its value even if it is zero.
2377 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2378           submatrix (same for all local rows).
2379 - o_nnz - array containing the number of nonzeros in the various block rows of the
2380           off-diagonal portion of the local submatrix (possibly different for
2381           each block row) or `NULL`.
2382 
2383   Output Parameter:
2384 . A - the matrix
2385 
2386   Options Database Keys:
2387 + -mat_no_unroll  - uses code that does not unroll the loops in the
2388                     block calculations (much slower)
2389 . -mat_block_size - size of the blocks to use
2390 - -mat_mpi        - use the parallel matrix data structures even on one processor
2391                     (defaults to using SeqBAIJ format on one processor)
2392 
2393   Level: intermediate
2394 
2395   Notes:
2396   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2397   MatXXXXSetPreallocation() paradigm instead of this routine directly.
2398   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2399 
2400   The number of rows and columns must be divisible by blocksize.
2401   This matrix type does not support complex Hermitian operation.
2402 
2403   The user MUST specify either the local or global matrix dimensions
2404   (possibly both).
2405 
2406   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2407   than it must be used on all processors that share the object for that argument.
2408 
2409   If `m` and `n` are not `PETSC_DECIDE`, then the values determines the `PetscLayout` of the matrix and the ranges returned by
2410   `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
2411 
2412   If the *_nnz parameter is given then the *_nz parameter is ignored
2413 
2414   Storage Information:
2415   For a square global matrix we define each processor's diagonal portion
2416   to be its local rows and the corresponding columns (a square submatrix);
2417   each processor's off-diagonal portion encompasses the remainder of the
2418   local matrix (a rectangular submatrix).
2419 
2420   The user can specify preallocated storage for the diagonal part of
2421   the local submatrix with either `d_nz` or `d_nnz` (not both). Set
2422   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2423   memory allocation. Likewise, specify preallocated storage for the
2424   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2425 
2426   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2427   the figure below we depict these three local rows and all columns (0-11).
2428 
2429 .vb
2430            0 1 2 3 4 5 6 7 8 9 10 11
2431           --------------------------
2432    row 3  |. . . d d d o o o o  o  o
2433    row 4  |. . . d d d o o o o  o  o
2434    row 5  |. . . d d d o o o o  o  o
2435           --------------------------
2436 .ve
2437 
2438   Thus, any entries in the d locations are stored in the d (diagonal)
2439   submatrix, and any entries in the o locations are stored in the
2440   o (off-diagonal) submatrix. Note that the d matrix is stored in
2441   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2442 
2443   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2444   plus the diagonal part of the d matrix,
2445   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
2446   In general, for PDE problems in which most nonzeros are near the diagonal,
2447   one expects `d_nz` >> `o_nz`.
2448 
2449 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`,
2450           `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
2451 @*/
2452 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2453 {
2454   PetscMPIInt size;
2455 
2456   PetscFunctionBegin;
2457   PetscCall(MatCreate(comm, A));
2458   PetscCall(MatSetSizes(*A, m, n, M, N));
2459   PetscCallMPI(MPI_Comm_size(comm, &size));
2460   if (size > 1) {
2461     PetscCall(MatSetType(*A, MATMPISBAIJ));
2462     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2463   } else {
2464     PetscCall(MatSetType(*A, MATSEQSBAIJ));
2465     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2466   }
2467   PetscFunctionReturn(PETSC_SUCCESS);
2468 }
2469 
2470 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2471 {
2472   Mat           mat;
2473   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2474   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2475   PetscScalar  *array;
2476 
2477   PetscFunctionBegin;
2478   *newmat = NULL;
2479 
2480   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2481   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2482   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2483   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2484   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2485 
2486   if (matin->hash_active) {
2487     PetscCall(MatSetUp(mat));
2488   } else {
2489     mat->factortype   = matin->factortype;
2490     mat->preallocated = PETSC_TRUE;
2491     mat->assembled    = PETSC_TRUE;
2492     mat->insertmode   = NOT_SET_VALUES;
2493 
2494     a      = (Mat_MPISBAIJ *)mat->data;
2495     a->bs2 = oldmat->bs2;
2496     a->mbs = oldmat->mbs;
2497     a->nbs = oldmat->nbs;
2498     a->Mbs = oldmat->Mbs;
2499     a->Nbs = oldmat->Nbs;
2500 
2501     a->size         = oldmat->size;
2502     a->rank         = oldmat->rank;
2503     a->donotstash   = oldmat->donotstash;
2504     a->roworiented  = oldmat->roworiented;
2505     a->rowindices   = NULL;
2506     a->rowvalues    = NULL;
2507     a->getrowactive = PETSC_FALSE;
2508     a->barray       = NULL;
2509     a->rstartbs     = oldmat->rstartbs;
2510     a->rendbs       = oldmat->rendbs;
2511     a->cstartbs     = oldmat->cstartbs;
2512     a->cendbs       = oldmat->cendbs;
2513 
2514     /* hash table stuff */
2515     a->ht           = NULL;
2516     a->hd           = NULL;
2517     a->ht_size      = 0;
2518     a->ht_flag      = oldmat->ht_flag;
2519     a->ht_fact      = oldmat->ht_fact;
2520     a->ht_total_ct  = 0;
2521     a->ht_insert_ct = 0;
2522 
2523     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2524     if (oldmat->colmap) {
2525 #if defined(PETSC_USE_CTABLE)
2526       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2527 #else
2528       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2529       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2530 #endif
2531     } else a->colmap = NULL;
2532 
2533     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
2534       PetscCall(PetscMalloc1(len, &a->garray));
2535       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2536     } else a->garray = NULL;
2537 
2538     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2539     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2540     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2541 
2542     PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2543     PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2544 
2545     PetscCall(VecGetLocalSize(a->slvec1, &nt));
2546     PetscCall(VecGetArray(a->slvec1, &array));
2547     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2548     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2549     PetscCall(VecRestoreArray(a->slvec1, &array));
2550     PetscCall(VecGetArray(a->slvec0, &array));
2551     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2552     PetscCall(VecRestoreArray(a->slvec0, &array));
2553 
2554     /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2555     PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2556     a->sMvctx = oldmat->sMvctx;
2557 
2558     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2559     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2560   }
2561   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2562   *newmat = mat;
2563   PetscFunctionReturn(PETSC_SUCCESS);
2564 }
2565 
2566 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2567 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2568 
2569 static PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2570 {
2571   PetscBool isbinary;
2572 
2573   PetscFunctionBegin;
2574   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2575   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2576   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2577   PetscFunctionReturn(PETSC_SUCCESS);
2578 }
2579 
2580 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2581 {
2582   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2583   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)a->B->data;
2584   PetscReal     atmp;
2585   PetscReal    *work, *svalues, *rvalues;
2586   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2587   PetscMPIInt   rank, size;
2588   PetscInt     *rowners_bs, dest, count, source;
2589   PetscScalar  *va;
2590   MatScalar    *ba;
2591   MPI_Status    stat;
2592 
2593   PetscFunctionBegin;
2594   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2595   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2596   PetscCall(VecGetArray(v, &va));
2597 
2598   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2599   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2600 
2601   bs  = A->rmap->bs;
2602   mbs = a->mbs;
2603   Mbs = a->Mbs;
2604   ba  = b->a;
2605   bi  = b->i;
2606   bj  = b->j;
2607 
2608   /* find ownerships */
2609   rowners_bs = A->rmap->range;
2610 
2611   /* each proc creates an array to be distributed */
2612   PetscCall(PetscCalloc1(bs * Mbs, &work));
2613 
2614   /* row_max for B */
2615   if (rank != size - 1) {
2616     for (i = 0; i < mbs; i++) {
2617       ncols = bi[1] - bi[0];
2618       bi++;
2619       brow = bs * i;
2620       for (j = 0; j < ncols; j++) {
2621         bcol = bs * (*bj);
2622         for (kcol = 0; kcol < bs; kcol++) {
2623           col = bcol + kcol;           /* local col index */
2624           col += rowners_bs[rank + 1]; /* global col index */
2625           for (krow = 0; krow < bs; krow++) {
2626             atmp = PetscAbsScalar(*ba);
2627             ba++;
2628             row = brow + krow; /* local row index */
2629             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2630             if (work[col] < atmp) work[col] = atmp;
2631           }
2632         }
2633         bj++;
2634       }
2635     }
2636 
2637     /* send values to its owners */
2638     for (dest = rank + 1; dest < size; dest++) {
2639       svalues = work + rowners_bs[dest];
2640       count   = rowners_bs[dest + 1] - rowners_bs[dest];
2641       PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2642     }
2643   }
2644 
2645   /* receive values */
2646   if (rank) {
2647     rvalues = work;
2648     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2649     for (source = 0; source < rank; source++) {
2650       PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2651       /* process values */
2652       for (i = 0; i < count; i++) {
2653         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2654       }
2655     }
2656   }
2657 
2658   PetscCall(VecRestoreArray(v, &va));
2659   PetscCall(PetscFree(work));
2660   PetscFunctionReturn(PETSC_SUCCESS);
2661 }
2662 
2663 static PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2664 {
2665   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2666   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
2667   PetscScalar       *x, *ptr, *from;
2668   Vec                bb1;
2669   const PetscScalar *b;
2670 
2671   PetscFunctionBegin;
2672   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2673   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2674 
2675   if (flag == SOR_APPLY_UPPER) {
2676     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2677     PetscFunctionReturn(PETSC_SUCCESS);
2678   }
2679 
2680   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2681     if (flag & SOR_ZERO_INITIAL_GUESS) {
2682       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2683       its--;
2684     }
2685 
2686     PetscCall(VecDuplicate(bb, &bb1));
2687     while (its--) {
2688       /* lower triangular part: slvec0b = - B^T*xx */
2689       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2690 
2691       /* copy xx into slvec0a */
2692       PetscCall(VecGetArray(mat->slvec0, &ptr));
2693       PetscCall(VecGetArray(xx, &x));
2694       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2695       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2696 
2697       PetscCall(VecScale(mat->slvec0, -1.0));
2698 
2699       /* copy bb into slvec1a */
2700       PetscCall(VecGetArray(mat->slvec1, &ptr));
2701       PetscCall(VecGetArrayRead(bb, &b));
2702       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2703       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2704 
2705       /* set slvec1b = 0 */
2706       PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2707       PetscCall(VecZeroEntries(mat->slvec1b));
2708 
2709       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2710       PetscCall(VecRestoreArray(xx, &x));
2711       PetscCall(VecRestoreArrayRead(bb, &b));
2712       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2713 
2714       /* upper triangular part: bb1 = bb1 - B*x */
2715       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2716 
2717       /* local diagonal sweep */
2718       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2719     }
2720     PetscCall(VecDestroy(&bb1));
2721   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2722     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2723   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2724     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2725   } else if (flag & SOR_EISENSTAT) {
2726     Vec                xx1;
2727     PetscBool          hasop;
2728     const PetscScalar *diag;
2729     PetscScalar       *sl, scale = (omega - 2.0) / omega;
2730     PetscInt           i, n;
2731 
2732     if (!mat->xx1) {
2733       PetscCall(VecDuplicate(bb, &mat->xx1));
2734       PetscCall(VecDuplicate(bb, &mat->bb1));
2735     }
2736     xx1 = mat->xx1;
2737     bb1 = mat->bb1;
2738 
2739     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2740 
2741     if (!mat->diag) {
2742       /* this is wrong for same matrix with new nonzero values */
2743       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2744       PetscCall(MatGetDiagonal(matin, mat->diag));
2745     }
2746     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2747 
2748     if (hasop) {
2749       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2750       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2751     } else {
2752       /*
2753           These two lines are replaced by code that may be a bit faster for a good compiler
2754       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2755       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2756       */
2757       PetscCall(VecGetArray(mat->slvec1a, &sl));
2758       PetscCall(VecGetArrayRead(mat->diag, &diag));
2759       PetscCall(VecGetArrayRead(bb, &b));
2760       PetscCall(VecGetArray(xx, &x));
2761       PetscCall(VecGetLocalSize(xx, &n));
2762       if (omega == 1.0) {
2763         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2764         PetscCall(PetscLogFlops(2.0 * n));
2765       } else {
2766         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2767         PetscCall(PetscLogFlops(3.0 * n));
2768       }
2769       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2770       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2771       PetscCall(VecRestoreArrayRead(bb, &b));
2772       PetscCall(VecRestoreArray(xx, &x));
2773     }
2774 
2775     /* multiply off-diagonal portion of matrix */
2776     PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2777     PetscCall(VecZeroEntries(mat->slvec1b));
2778     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2779     PetscCall(VecGetArray(mat->slvec0, &from));
2780     PetscCall(VecGetArray(xx, &x));
2781     PetscCall(PetscArraycpy(from, x, bs * mbs));
2782     PetscCall(VecRestoreArray(mat->slvec0, &from));
2783     PetscCall(VecRestoreArray(xx, &x));
2784     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2785     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2786     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2787 
2788     /* local sweep */
2789     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2790     PetscCall(VecAXPY(xx, 1.0, xx1));
2791   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2792   PetscFunctionReturn(PETSC_SUCCESS);
2793 }
2794 
2795 /*@
2796   MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard CSR format for the local rows.
2797 
2798   Collective
2799 
2800   Input Parameters:
2801 + comm - MPI communicator
2802 . bs   - the block size, only a block size of 1 is supported
2803 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
2804 . n    - This value should be the same as the local size used in creating the
2805          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
2806          calculated if `N` is given) For square matrices `n` is almost always `m`.
2807 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2808 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2809 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2810 . j    - column indices
2811 - a    - matrix values
2812 
2813   Output Parameter:
2814 . mat - the matrix
2815 
2816   Level: intermediate
2817 
2818   Notes:
2819   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
2820   thus you CANNOT change the matrix entries by changing the values of `a` after you have
2821   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
2822 
2823   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
2824 
2825 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2826           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatMPISBAIJSetPreallocationCSR()`
2827 @*/
2828 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2829 {
2830   PetscFunctionBegin;
2831   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2832   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2833   PetscCall(MatCreate(comm, mat));
2834   PetscCall(MatSetSizes(*mat, m, n, M, N));
2835   PetscCall(MatSetType(*mat, MATMPISBAIJ));
2836   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2837   PetscFunctionReturn(PETSC_SUCCESS);
2838 }
2839 
2840 /*@C
2841   MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2842 
2843   Collective
2844 
2845   Input Parameters:
2846 + B  - the matrix
2847 . bs - the block size
2848 . i  - the indices into `j` for the start of each local row (indices start with zero)
2849 . j  - the column indices for each local row (indices start with zero) these must be sorted for each row
2850 - v  - optional values in the matrix, pass `NULL` if not provided
2851 
2852   Level: advanced
2853 
2854   Notes:
2855   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2856   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2857   called this routine.
2858 
2859   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2860   and usually the numerical values as well
2861 
2862   Any entries passed in that are below the diagonal are ignored
2863 
2864 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
2865           `MatCreateMPISBAIJWithArrays()`
2866 @*/
2867 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2868 {
2869   PetscFunctionBegin;
2870   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2871   PetscFunctionReturn(PETSC_SUCCESS);
2872 }
2873 
2874 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2875 {
2876   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
2877   PetscInt    *indx;
2878   PetscScalar *values;
2879 
2880   PetscFunctionBegin;
2881   PetscCall(MatGetSize(inmat, &m, &N));
2882   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2883     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2884     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
2885     PetscInt     *bindx, rmax = a->rmax, j;
2886     PetscMPIInt   rank, size;
2887 
2888     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2889     mbs = m / bs;
2890     Nbs = N / cbs;
2891     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2892     nbs = n / cbs;
2893 
2894     PetscCall(PetscMalloc1(rmax, &bindx));
2895     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2896 
2897     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2898     PetscCallMPI(MPI_Comm_rank(comm, &size));
2899     if (rank == size - 1) {
2900       /* Check sum(nbs) = Nbs */
2901       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2902     }
2903 
2904     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2905     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2906     for (i = 0; i < mbs; i++) {
2907       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2908       nnz = nnz / bs;
2909       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2910       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2911       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2912     }
2913     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2914     PetscCall(PetscFree(bindx));
2915 
2916     PetscCall(MatCreate(comm, outmat));
2917     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2918     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2919     PetscCall(MatSetType(*outmat, MATSBAIJ));
2920     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2921     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2922     MatPreallocateEnd(dnz, onz);
2923   }
2924 
2925   /* numeric phase */
2926   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2927   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2928 
2929   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2930   for (i = 0; i < m; i++) {
2931     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2932     Ii = i + rstart;
2933     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2934     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2935   }
2936   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2937   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2938   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2939   PetscFunctionReturn(PETSC_SUCCESS);
2940 }
2941