xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision e91c04dfc8a52dee1965211bb1cc8e5bf775178f)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscblaslapack.h>
5 
6 static PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
7 {
8   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
9 
10   PetscFunctionBegin;
11   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
12   PetscCall(MatStashDestroy_Private(&mat->stash));
13   PetscCall(MatStashDestroy_Private(&mat->bstash));
14   PetscCall(MatDestroy(&baij->A));
15   PetscCall(MatDestroy(&baij->B));
16 #if defined(PETSC_USE_CTABLE)
17   PetscCall(PetscHMapIDestroy(&baij->colmap));
18 #else
19   PetscCall(PetscFree(baij->colmap));
20 #endif
21   PetscCall(PetscFree(baij->garray));
22   PetscCall(VecDestroy(&baij->lvec));
23   PetscCall(VecScatterDestroy(&baij->Mvctx));
24   PetscCall(VecDestroy(&baij->slvec0));
25   PetscCall(VecDestroy(&baij->slvec0b));
26   PetscCall(VecDestroy(&baij->slvec1));
27   PetscCall(VecDestroy(&baij->slvec1a));
28   PetscCall(VecDestroy(&baij->slvec1b));
29   PetscCall(VecScatterDestroy(&baij->sMvctx));
30   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
31   PetscCall(PetscFree(baij->barray));
32   PetscCall(PetscFree(baij->hd));
33   PetscCall(VecDestroy(&baij->diag));
34   PetscCall(VecDestroy(&baij->bb1));
35   PetscCall(VecDestroy(&baij->xx1));
36 #if defined(PETSC_USE_REAL_MAT_SINGLE)
37   PetscCall(PetscFree(baij->setvaluescopy));
38 #endif
39   PetscCall(PetscFree(baij->in_loc));
40   PetscCall(PetscFree(baij->v_loc));
41   PetscCall(PetscFree(baij->rangebs));
42   PetscCall(PetscFree(mat->data));
43 
44   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
45   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
47   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
49 #if defined(PETSC_HAVE_ELEMENTAL)
50   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
51 #endif
52 #if defined(PETSC_HAVE_SCALAPACK)
53   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
54 #endif
55   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
57   PetscFunctionReturn(PETSC_SUCCESS);
58 }
59 
60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */
61 #define TYPE SBAIJ
62 #define TYPE_SBAIJ
63 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
64 #undef TYPE
65 #undef TYPE_SBAIJ
66 
67 #if defined(PETSC_HAVE_ELEMENTAL)
68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
69 #endif
70 #if defined(PETSC_HAVE_SCALAPACK)
71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
72 #endif
73 
74 /* This could be moved to matimpl.h */
75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
76 {
77   Mat       preallocator;
78   PetscInt  r, rstart, rend;
79   PetscInt  bs, i, m, n, M, N;
80   PetscBool cong = PETSC_TRUE;
81 
82   PetscFunctionBegin;
83   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
84   PetscValidLogicalCollectiveInt(B, nm, 2);
85   for (i = 0; i < nm; i++) {
86     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
87     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
88     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
89   }
90   PetscValidLogicalCollectiveBool(B, fill, 5);
91   PetscCall(MatGetBlockSize(B, &bs));
92   PetscCall(MatGetSize(B, &M, &N));
93   PetscCall(MatGetLocalSize(B, &m, &n));
94   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
95   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
96   PetscCall(MatSetBlockSize(preallocator, bs));
97   PetscCall(MatSetSizes(preallocator, m, n, M, N));
98   PetscCall(MatSetUp(preallocator));
99   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
100   for (r = rstart; r < rend; ++r) {
101     PetscInt           ncols;
102     const PetscInt    *row;
103     const PetscScalar *vals;
104 
105     for (i = 0; i < nm; i++) {
106       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
107       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
108       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
109       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
110     }
111   }
112   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
113   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
114   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
115   PetscCall(MatDestroy(&preallocator));
116   PetscFunctionReturn(PETSC_SUCCESS);
117 }
118 
119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
120 {
121   Mat      B;
122   PetscInt r;
123 
124   PetscFunctionBegin;
125   if (reuse != MAT_REUSE_MATRIX) {
126     PetscBool symm = PETSC_TRUE, isdense;
127     PetscInt  bs;
128 
129     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
130     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
131     PetscCall(MatSetType(B, newtype));
132     PetscCall(MatGetBlockSize(A, &bs));
133     PetscCall(MatSetBlockSize(B, bs));
134     PetscCall(PetscLayoutSetUp(B->rmap));
135     PetscCall(PetscLayoutSetUp(B->cmap));
136     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
137     if (!isdense) {
138       PetscCall(MatGetRowUpperTriangular(A));
139       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
140       PetscCall(MatRestoreRowUpperTriangular(A));
141     } else {
142       PetscCall(MatSetUp(B));
143     }
144   } else {
145     B = *newmat;
146     PetscCall(MatZeroEntries(B));
147   }
148 
149   PetscCall(MatGetRowUpperTriangular(A));
150   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
151     PetscInt           ncols;
152     const PetscInt    *row;
153     const PetscScalar *vals;
154 
155     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
156     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
157 #if defined(PETSC_USE_COMPLEX)
158     if (A->hermitian == PETSC_BOOL3_TRUE) {
159       PetscInt i;
160       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
161     } else {
162       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
163     }
164 #else
165     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
166 #endif
167     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
168   }
169   PetscCall(MatRestoreRowUpperTriangular(A));
170   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
171   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
172 
173   if (reuse == MAT_INPLACE_MATRIX) {
174     PetscCall(MatHeaderReplace(A, &B));
175   } else {
176     *newmat = B;
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
182 {
183   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
184 
185   PetscFunctionBegin;
186   PetscCall(MatStoreValues(aij->A));
187   PetscCall(MatStoreValues(aij->B));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 static PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
192 {
193   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
194 
195   PetscFunctionBegin;
196   PetscCall(MatRetrieveValues(aij->A));
197   PetscCall(MatRetrieveValues(aij->B));
198   PetscFunctionReturn(PETSC_SUCCESS);
199 }
200 
201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
202   do { \
203     brow = row / bs; \
204     rp   = aj + ai[brow]; \
205     ap   = aa + bs2 * ai[brow]; \
206     rmax = aimax[brow]; \
207     nrow = ailen[brow]; \
208     bcol = col / bs; \
209     ridx = row % bs; \
210     cidx = col % bs; \
211     low  = 0; \
212     high = nrow; \
213     while (high - low > 3) { \
214       t = (low + high) / 2; \
215       if (rp[t] > bcol) high = t; \
216       else low = t; \
217     } \
218     for (_i = low; _i < high; _i++) { \
219       if (rp[_i] > bcol) break; \
220       if (rp[_i] == bcol) { \
221         bap = ap + bs2 * _i + bs * cidx + ridx; \
222         if (addv == ADD_VALUES) *bap += value; \
223         else *bap = value; \
224         goto a_noinsert; \
225       } \
226     } \
227     if (a->nonew == 1) goto a_noinsert; \
228     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
229     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
230     N = nrow++ - 1; \
231     /* shift up all the later entries in this row */ \
232     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
233     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
234     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
235     rp[_i]                          = bcol; \
236     ap[bs2 * _i + bs * cidx + ridx] = value; \
237   a_noinsert:; \
238     ailen[brow] = nrow; \
239   } while (0)
240 
241 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
242   do { \
243     brow = row / bs; \
244     rp   = bj + bi[brow]; \
245     ap   = ba + bs2 * bi[brow]; \
246     rmax = bimax[brow]; \
247     nrow = bilen[brow]; \
248     bcol = col / bs; \
249     ridx = row % bs; \
250     cidx = col % bs; \
251     low  = 0; \
252     high = nrow; \
253     while (high - low > 3) { \
254       t = (low + high) / 2; \
255       if (rp[t] > bcol) high = t; \
256       else low = t; \
257     } \
258     for (_i = low; _i < high; _i++) { \
259       if (rp[_i] > bcol) break; \
260       if (rp[_i] == bcol) { \
261         bap = ap + bs2 * _i + bs * cidx + ridx; \
262         if (addv == ADD_VALUES) *bap += value; \
263         else *bap = value; \
264         goto b_noinsert; \
265       } \
266     } \
267     if (b->nonew == 1) goto b_noinsert; \
268     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
269     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
270     N = nrow++ - 1; \
271     /* shift up all the later entries in this row */ \
272     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
273     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
274     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
275     rp[_i]                          = bcol; \
276     ap[bs2 * _i + bs * cidx + ridx] = value; \
277   b_noinsert:; \
278     bilen[brow] = nrow; \
279   } while (0)
280 
281 /* Only add/insert a(i,j) with i<=j (blocks).
282    Any a(i,j) with i>j input by user is ignored or generates an error
283 */
284 static PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
285 {
286   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
287   MatScalar     value;
288   PetscBool     roworiented = baij->roworiented;
289   PetscInt      i, j, row, col;
290   PetscInt      rstart_orig = mat->rmap->rstart;
291   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
292   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
293 
294   /* Some Variables required in the macro */
295   Mat           A     = baij->A;
296   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)A->data;
297   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
298   MatScalar    *aa = a->a;
299 
300   Mat          B     = baij->B;
301   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)B->data;
302   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
303   MatScalar   *ba = b->a;
304 
305   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
306   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
307   MatScalar *ap, *bap;
308 
309   /* for stash */
310   PetscInt   n_loc, *in_loc = NULL;
311   MatScalar *v_loc = NULL;
312 
313   PetscFunctionBegin;
314   if (!baij->donotstash) {
315     if (n > baij->n_loc) {
316       PetscCall(PetscFree(baij->in_loc));
317       PetscCall(PetscFree(baij->v_loc));
318       PetscCall(PetscMalloc1(n, &baij->in_loc));
319       PetscCall(PetscMalloc1(n, &baij->v_loc));
320 
321       baij->n_loc = n;
322     }
323     in_loc = baij->in_loc;
324     v_loc  = baij->v_loc;
325   }
326 
327   for (i = 0; i < m; i++) {
328     if (im[i] < 0) continue;
329     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
330     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
331       row = im[i] - rstart_orig;                     /* local row index */
332       for (j = 0; j < n; j++) {
333         if (im[i] / bs > in[j] / bs) {
334           if (a->ignore_ltriangular) {
335             continue; /* ignore lower triangular blocks */
336           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
337         }
338         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
339           col  = in[j] - cstart_orig;                    /* local col index */
340           brow = row / bs;
341           bcol = col / bs;
342           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
343           if (roworiented) value = v[i * n + j];
344           else value = v[i + j * m];
345           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
346           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
347         } else if (in[j] < 0) {
348           continue;
349         } else {
350           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
351           /* off-diag entry (B) */
352           if (mat->was_assembled) {
353             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
354 #if defined(PETSC_USE_CTABLE)
355             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
356             col = col - 1;
357 #else
358             col = baij->colmap[in[j] / bs] - 1;
359 #endif
360             if (col < 0 && !((Mat_SeqSBAIJ *)baij->A->data)->nonew) {
361               PetscCall(MatDisAssemble_MPISBAIJ(mat));
362               col = in[j];
363               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
364               B     = baij->B;
365               b     = (Mat_SeqBAIJ *)B->data;
366               bimax = b->imax;
367               bi    = b->i;
368               bilen = b->ilen;
369               bj    = b->j;
370               ba    = b->a;
371             } else col += in[j] % bs;
372           } else col = in[j];
373           if (roworiented) value = v[i * n + j];
374           else value = v[i + j * m];
375           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
376           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
377         }
378       }
379     } else { /* off processor entry */
380       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
381       if (!baij->donotstash) {
382         mat->assembled = PETSC_FALSE;
383         n_loc          = 0;
384         for (j = 0; j < n; j++) {
385           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
386           in_loc[n_loc] = in[j];
387           if (roworiented) {
388             v_loc[n_loc] = v[i * n + j];
389           } else {
390             v_loc[n_loc] = v[j * m + i];
391           }
392           n_loc++;
393         }
394         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
395       }
396     }
397   }
398   PetscFunctionReturn(PETSC_SUCCESS);
399 }
400 
401 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
402 {
403   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
404   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
405   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
406   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
407   PetscBool          roworiented = a->roworiented;
408   const PetscScalar *value       = v;
409   MatScalar         *ap, *aa = a->a, *bap;
410 
411   PetscFunctionBegin;
412   if (col < row) {
413     if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
414     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
415   }
416   rp    = aj + ai[row];
417   ap    = aa + bs2 * ai[row];
418   rmax  = imax[row];
419   nrow  = ailen[row];
420   value = v;
421   low   = 0;
422   high  = nrow;
423 
424   while (high - low > 7) {
425     t = (low + high) / 2;
426     if (rp[t] > col) high = t;
427     else low = t;
428   }
429   for (i = low; i < high; i++) {
430     if (rp[i] > col) break;
431     if (rp[i] == col) {
432       bap = ap + bs2 * i;
433       if (roworiented) {
434         if (is == ADD_VALUES) {
435           for (ii = 0; ii < bs; ii++) {
436             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
437           }
438         } else {
439           for (ii = 0; ii < bs; ii++) {
440             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
441           }
442         }
443       } else {
444         if (is == ADD_VALUES) {
445           for (ii = 0; ii < bs; ii++) {
446             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
447           }
448         } else {
449           for (ii = 0; ii < bs; ii++) {
450             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
451           }
452         }
453       }
454       goto noinsert2;
455     }
456   }
457   if (nonew == 1) goto noinsert2;
458   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
459   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
460   N = nrow++ - 1;
461   high++;
462   /* shift up all the later entries in this row */
463   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
464   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
465   rp[i] = col;
466   bap   = ap + bs2 * i;
467   if (roworiented) {
468     for (ii = 0; ii < bs; ii++) {
469       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
470     }
471   } else {
472     for (ii = 0; ii < bs; ii++) {
473       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
474     }
475   }
476 noinsert2:;
477   ailen[row] = nrow;
478   PetscFunctionReturn(PETSC_SUCCESS);
479 }
480 
481 /*
482    This routine is exactly duplicated in mpibaij.c
483 */
484 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
485 {
486   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
487   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
488   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
489   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
490   PetscBool          roworiented = a->roworiented;
491   const PetscScalar *value       = v;
492   MatScalar         *ap, *aa = a->a, *bap;
493 
494   PetscFunctionBegin;
495   rp    = aj + ai[row];
496   ap    = aa + bs2 * ai[row];
497   rmax  = imax[row];
498   nrow  = ailen[row];
499   low   = 0;
500   high  = nrow;
501   value = v;
502   while (high - low > 7) {
503     t = (low + high) / 2;
504     if (rp[t] > col) high = t;
505     else low = t;
506   }
507   for (i = low; i < high; i++) {
508     if (rp[i] > col) break;
509     if (rp[i] == col) {
510       bap = ap + bs2 * i;
511       if (roworiented) {
512         if (is == ADD_VALUES) {
513           for (ii = 0; ii < bs; ii++) {
514             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
515           }
516         } else {
517           for (ii = 0; ii < bs; ii++) {
518             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
519           }
520         }
521       } else {
522         if (is == ADD_VALUES) {
523           for (ii = 0; ii < bs; ii++, value += bs) {
524             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
525             bap += bs;
526           }
527         } else {
528           for (ii = 0; ii < bs; ii++, value += bs) {
529             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
530             bap += bs;
531           }
532         }
533       }
534       goto noinsert2;
535     }
536   }
537   if (nonew == 1) goto noinsert2;
538   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
539   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
540   N = nrow++ - 1;
541   high++;
542   /* shift up all the later entries in this row */
543   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
544   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
545   rp[i] = col;
546   bap   = ap + bs2 * i;
547   if (roworiented) {
548     for (ii = 0; ii < bs; ii++) {
549       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
550     }
551   } else {
552     for (ii = 0; ii < bs; ii++) {
553       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
554     }
555   }
556 noinsert2:;
557   ailen[row] = nrow;
558   PetscFunctionReturn(PETSC_SUCCESS);
559 }
560 
561 /*
562     This routine could be optimized by removing the need for the block copy below and passing stride information
563   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
564 */
565 static PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
566 {
567   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
568   const MatScalar *value;
569   MatScalar       *barray      = baij->barray;
570   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
571   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
572   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
573   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
574 
575   PetscFunctionBegin;
576   if (!barray) {
577     PetscCall(PetscMalloc1(bs2, &barray));
578     baij->barray = barray;
579   }
580 
581   if (roworiented) {
582     stepval = (n - 1) * bs;
583   } else {
584     stepval = (m - 1) * bs;
585   }
586   for (i = 0; i < m; i++) {
587     if (im[i] < 0) continue;
588     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
589     if (im[i] >= rstart && im[i] < rend) {
590       row = im[i] - rstart;
591       for (j = 0; j < n; j++) {
592         if (im[i] > in[j]) {
593           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
594           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
595         }
596         /* If NumCol = 1 then a copy is not required */
597         if ((roworiented) && (n == 1)) {
598           barray = (MatScalar *)v + i * bs2;
599         } else if ((!roworiented) && (m == 1)) {
600           barray = (MatScalar *)v + j * bs2;
601         } else { /* Here a copy is required */
602           if (roworiented) {
603             value = v + i * (stepval + bs) * bs + j * bs;
604           } else {
605             value = v + j * (stepval + bs) * bs + i * bs;
606           }
607           for (ii = 0; ii < bs; ii++, value += stepval) {
608             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
609           }
610           barray -= bs2;
611         }
612 
613         if (in[j] >= cstart && in[j] < cend) {
614           col = in[j] - cstart;
615           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
616         } else if (in[j] < 0) {
617           continue;
618         } else {
619           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
620           if (mat->was_assembled) {
621             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
622 
623 #if defined(PETSC_USE_CTABLE)
624             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
625             col = col < 1 ? -1 : (col - 1) / bs;
626 #else
627             col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs;
628 #endif
629             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
630               PetscCall(MatDisAssemble_MPISBAIJ(mat));
631               col = in[j];
632             }
633           } else col = in[j];
634           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
635         }
636       }
637     } else {
638       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
639       if (!baij->donotstash) {
640         if (roworiented) {
641           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
642         } else {
643           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
644         }
645       }
646     }
647   }
648   PetscFunctionReturn(PETSC_SUCCESS);
649 }
650 
651 static PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
652 {
653   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
654   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
655   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
656 
657   PetscFunctionBegin;
658   for (i = 0; i < m; i++) {
659     if (idxm[i] < 0) continue; /* negative row */
660     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
661     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
662       row = idxm[i] - bsrstart;
663       for (j = 0; j < n; j++) {
664         if (idxn[j] < 0) continue; /* negative column */
665         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
666         if (idxn[j] >= bscstart && idxn[j] < bscend) {
667           col = idxn[j] - bscstart;
668           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
669         } else {
670           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
671 #if defined(PETSC_USE_CTABLE)
672           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
673           data--;
674 #else
675           data = baij->colmap[idxn[j] / bs] - 1;
676 #endif
677           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
678           else {
679             col = data + idxn[j] % bs;
680             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
681           }
682         }
683       }
684     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
685   }
686   PetscFunctionReturn(PETSC_SUCCESS);
687 }
688 
689 static PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
690 {
691   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
692   PetscReal     sum[2], *lnorm2;
693 
694   PetscFunctionBegin;
695   if (baij->size == 1) {
696     PetscCall(MatNorm(baij->A, type, norm));
697   } else {
698     if (type == NORM_FROBENIUS) {
699       PetscCall(PetscMalloc1(2, &lnorm2));
700       PetscCall(MatNorm(baij->A, type, lnorm2));
701       *lnorm2 = (*lnorm2) * (*lnorm2);
702       lnorm2++; /* squar power of norm(A) */
703       PetscCall(MatNorm(baij->B, type, lnorm2));
704       *lnorm2 = (*lnorm2) * (*lnorm2);
705       lnorm2--; /* squar power of norm(B) */
706       PetscCallMPI(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
707       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
708       PetscCall(PetscFree(lnorm2));
709     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
710       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
711       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
712       PetscReal    *rsum, vabs;
713       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
714       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
715       MatScalar    *v;
716 
717       PetscCall(PetscMalloc1(mat->cmap->N, &rsum));
718       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
719       /* Amat */
720       v  = amat->a;
721       jj = amat->j;
722       for (brow = 0; brow < mbs; brow++) {
723         grow = bs * (rstart + brow);
724         nz   = amat->i[brow + 1] - amat->i[brow];
725         for (bcol = 0; bcol < nz; bcol++) {
726           gcol = bs * (rstart + *jj);
727           jj++;
728           for (col = 0; col < bs; col++) {
729             for (row = 0; row < bs; row++) {
730               vabs = PetscAbsScalar(*v);
731               v++;
732               rsum[gcol + col] += vabs;
733               /* non-diagonal block */
734               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
735             }
736           }
737         }
738         PetscCall(PetscLogFlops(nz * bs * bs));
739       }
740       /* Bmat */
741       v  = bmat->a;
742       jj = bmat->j;
743       for (brow = 0; brow < mbs; brow++) {
744         grow = bs * (rstart + brow);
745         nz   = bmat->i[brow + 1] - bmat->i[brow];
746         for (bcol = 0; bcol < nz; bcol++) {
747           gcol = bs * garray[*jj];
748           jj++;
749           for (col = 0; col < bs; col++) {
750             for (row = 0; row < bs; row++) {
751               vabs = PetscAbsScalar(*v);
752               v++;
753               rsum[gcol + col] += vabs;
754               rsum[grow + row] += vabs;
755             }
756           }
757         }
758         PetscCall(PetscLogFlops(nz * bs * bs));
759       }
760       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, rsum, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
761       *norm = 0.0;
762       for (col = 0; col < mat->cmap->N; col++) {
763         if (rsum[col] > *norm) *norm = rsum[col];
764       }
765       PetscCall(PetscFree(rsum));
766     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
767   }
768   PetscFunctionReturn(PETSC_SUCCESS);
769 }
770 
771 static PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
772 {
773   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
774   PetscInt      nstash, reallocs;
775 
776   PetscFunctionBegin;
777   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
778 
779   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
780   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
781   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
782   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
783   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
784   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
785   PetscFunctionReturn(PETSC_SUCCESS);
786 }
787 
788 static PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
789 {
790   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
791   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
792   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
793   PetscInt     *row, *col;
794   PetscBool     other_disassembled;
795   PetscMPIInt   n;
796   PetscBool     r1, r2, r3;
797   MatScalar    *val;
798 
799   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
800   PetscFunctionBegin;
801   if (!baij->donotstash && !mat->nooffprocentries) {
802     while (1) {
803       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
804       if (!flg) break;
805 
806       for (i = 0; i < n;) {
807         /* Now identify the consecutive vals belonging to the same row */
808         for (j = i, rstart = row[j]; j < n; j++) {
809           if (row[j] != rstart) break;
810         }
811         if (j < n) ncols = j - i;
812         else ncols = n - i;
813         /* Now assemble all these values with a single function call */
814         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
815         i = j;
816       }
817     }
818     PetscCall(MatStashScatterEnd_Private(&mat->stash));
819     /* Now process the block-stash. Since the values are stashed column-oriented,
820        set the row-oriented flag to column-oriented, and after MatSetValues()
821        restore the original flags */
822     r1 = baij->roworiented;
823     r2 = a->roworiented;
824     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
825 
826     baij->roworiented = PETSC_FALSE;
827     a->roworiented    = PETSC_FALSE;
828 
829     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
830     while (1) {
831       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
832       if (!flg) break;
833 
834       for (i = 0; i < n;) {
835         /* Now identify the consecutive vals belonging to the same row */
836         for (j = i, rstart = row[j]; j < n; j++) {
837           if (row[j] != rstart) break;
838         }
839         if (j < n) ncols = j - i;
840         else ncols = n - i;
841         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
842         i = j;
843       }
844     }
845     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
846 
847     baij->roworiented = r1;
848     a->roworiented    = r2;
849 
850     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
851   }
852 
853   PetscCall(MatAssemblyBegin(baij->A, mode));
854   PetscCall(MatAssemblyEnd(baij->A, mode));
855 
856   /* determine if any processor has disassembled, if so we must
857      also disassemble ourselves, in order that we may reassemble. */
858   /*
859      if nonzero structure of submatrix B cannot change then we know that
860      no processor disassembled thus we can skip this stuff
861   */
862   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
863     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
864     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
865   }
866 
867   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
868   PetscCall(MatAssemblyBegin(baij->B, mode));
869   PetscCall(MatAssemblyEnd(baij->B, mode));
870 
871   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
872 
873   baij->rowvalues = NULL;
874 
875   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
876   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
877     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
878     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
879   }
880   PetscFunctionReturn(PETSC_SUCCESS);
881 }
882 
883 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
884 #include <petscdraw.h>
885 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
886 {
887   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
888   PetscInt          bs   = mat->rmap->bs;
889   PetscMPIInt       rank = baij->rank;
890   PetscBool         iascii, isdraw;
891   PetscViewer       sviewer;
892   PetscViewerFormat format;
893 
894   PetscFunctionBegin;
895   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
896   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
897   if (iascii) {
898     PetscCall(PetscViewerGetFormat(viewer, &format));
899     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
900       MatInfo info;
901       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
902       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
903       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
904       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
905                                                    mat->rmap->bs, info.memory));
906       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
907       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
908       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
909       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
910       PetscCall(PetscViewerFlush(viewer));
911       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
912       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
913       PetscCall(VecScatterView(baij->Mvctx, viewer));
914       PetscFunctionReturn(PETSC_SUCCESS);
915     } else if (format == PETSC_VIEWER_ASCII_INFO) {
916       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
917       PetscFunctionReturn(PETSC_SUCCESS);
918     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
919       PetscFunctionReturn(PETSC_SUCCESS);
920     }
921   }
922 
923   if (isdraw) {
924     PetscDraw draw;
925     PetscBool isnull;
926     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
927     PetscCall(PetscDrawIsNull(draw, &isnull));
928     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
929   }
930 
931   {
932     /* assemble the entire matrix onto first processor. */
933     Mat           A;
934     Mat_SeqSBAIJ *Aloc;
935     Mat_SeqBAIJ  *Bloc;
936     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
937     MatScalar    *a;
938     const char   *matname;
939 
940     /* Should this be the same type as mat? */
941     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
942     if (rank == 0) {
943       PetscCall(MatSetSizes(A, M, N, M, N));
944     } else {
945       PetscCall(MatSetSizes(A, 0, 0, M, N));
946     }
947     PetscCall(MatSetType(A, MATMPISBAIJ));
948     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
949     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
950 
951     /* copy over the A part */
952     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
953     ai   = Aloc->i;
954     aj   = Aloc->j;
955     a    = Aloc->a;
956     PetscCall(PetscMalloc1(bs, &rvals));
957 
958     for (i = 0; i < mbs; i++) {
959       rvals[0] = bs * (baij->rstartbs + i);
960       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
961       for (j = ai[i]; j < ai[i + 1]; j++) {
962         col = (baij->cstartbs + aj[j]) * bs;
963         for (k = 0; k < bs; k++) {
964           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
965           col++;
966           a += bs;
967         }
968       }
969     }
970     /* copy over the B part */
971     Bloc = (Mat_SeqBAIJ *)baij->B->data;
972     ai   = Bloc->i;
973     aj   = Bloc->j;
974     a    = Bloc->a;
975     for (i = 0; i < mbs; i++) {
976       rvals[0] = bs * (baij->rstartbs + i);
977       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
978       for (j = ai[i]; j < ai[i + 1]; j++) {
979         col = baij->garray[aj[j]] * bs;
980         for (k = 0; k < bs; k++) {
981           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
982           col++;
983           a += bs;
984         }
985       }
986     }
987     PetscCall(PetscFree(rvals));
988     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
989     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
990     /*
991        Everyone has to call to draw the matrix since the graphics waits are
992        synchronized across all processors that share the PetscDraw object
993     */
994     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
995     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
996     if (rank == 0) {
997       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)A->data)->A, matname));
998       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)A->data)->A, sviewer));
999     }
1000     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1001     PetscCall(MatDestroy(&A));
1002   }
1003   PetscFunctionReturn(PETSC_SUCCESS);
1004 }
1005 
1006 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1007 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
1008 
1009 static PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
1010 {
1011   PetscBool iascii, isdraw, issocket, isbinary;
1012 
1013   PetscFunctionBegin;
1014   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1015   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1016   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1017   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1018   if (iascii || isdraw || issocket) {
1019     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
1020   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
1021   PetscFunctionReturn(PETSC_SUCCESS);
1022 }
1023 
1024 #if defined(PETSC_USE_COMPLEX)
1025 static PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1026 {
1027   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1028   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1029   PetscScalar       *from;
1030   const PetscScalar *x;
1031 
1032   PetscFunctionBegin;
1033   /* diagonal part */
1034   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1035   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1036   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1037   PetscCall(VecZeroEntries(a->slvec1b));
1038 
1039   /* subdiagonal part */
1040   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1041   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1042 
1043   /* copy x into the vec slvec0 */
1044   PetscCall(VecGetArray(a->slvec0, &from));
1045   PetscCall(VecGetArrayRead(xx, &x));
1046 
1047   PetscCall(PetscArraycpy(from, x, bs * mbs));
1048   PetscCall(VecRestoreArray(a->slvec0, &from));
1049   PetscCall(VecRestoreArrayRead(xx, &x));
1050 
1051   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1052   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1053   /* supperdiagonal part */
1054   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1055   PetscFunctionReturn(PETSC_SUCCESS);
1056 }
1057 #endif
1058 
1059 static PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1060 {
1061   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1062   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1063   PetscScalar       *from;
1064   const PetscScalar *x;
1065 
1066   PetscFunctionBegin;
1067   /* diagonal part */
1068   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1069   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1070   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1071   PetscCall(VecZeroEntries(a->slvec1b));
1072 
1073   /* subdiagonal part */
1074   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1075 
1076   /* copy x into the vec slvec0 */
1077   PetscCall(VecGetArray(a->slvec0, &from));
1078   PetscCall(VecGetArrayRead(xx, &x));
1079 
1080   PetscCall(PetscArraycpy(from, x, bs * mbs));
1081   PetscCall(VecRestoreArray(a->slvec0, &from));
1082   PetscCall(VecRestoreArrayRead(xx, &x));
1083 
1084   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1085   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1086   /* supperdiagonal part */
1087   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1088   PetscFunctionReturn(PETSC_SUCCESS);
1089 }
1090 
1091 #if PetscDefined(USE_COMPLEX)
1092 static PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1093 {
1094   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1095   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1096   PetscScalar       *from;
1097   const PetscScalar *x;
1098 
1099   PetscFunctionBegin;
1100   /* diagonal part */
1101   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1102   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1103   PetscCall(VecZeroEntries(a->slvec1b));
1104 
1105   /* subdiagonal part */
1106   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1107   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1108 
1109   /* copy x into the vec slvec0 */
1110   PetscCall(VecGetArray(a->slvec0, &from));
1111   PetscCall(VecGetArrayRead(xx, &x));
1112   PetscCall(PetscArraycpy(from, x, bs * mbs));
1113   PetscCall(VecRestoreArray(a->slvec0, &from));
1114 
1115   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1116   PetscCall(VecRestoreArrayRead(xx, &x));
1117   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1118 
1119   /* supperdiagonal part */
1120   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1121   PetscFunctionReturn(PETSC_SUCCESS);
1122 }
1123 #endif
1124 
1125 static PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1126 {
1127   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1128   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1129   PetscScalar       *from;
1130   const PetscScalar *x;
1131 
1132   PetscFunctionBegin;
1133   /* diagonal part */
1134   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1135   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1136   PetscCall(VecZeroEntries(a->slvec1b));
1137 
1138   /* subdiagonal part */
1139   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1140 
1141   /* copy x into the vec slvec0 */
1142   PetscCall(VecGetArray(a->slvec0, &from));
1143   PetscCall(VecGetArrayRead(xx, &x));
1144   PetscCall(PetscArraycpy(from, x, bs * mbs));
1145   PetscCall(VecRestoreArray(a->slvec0, &from));
1146 
1147   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1148   PetscCall(VecRestoreArrayRead(xx, &x));
1149   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1150 
1151   /* supperdiagonal part */
1152   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1153   PetscFunctionReturn(PETSC_SUCCESS);
1154 }
1155 
1156 /*
1157   This only works correctly for square matrices where the subblock A->A is the
1158    diagonal block
1159 */
1160 static PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1161 {
1162   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1163 
1164   PetscFunctionBegin;
1165   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1166   PetscCall(MatGetDiagonal(a->A, v));
1167   PetscFunctionReturn(PETSC_SUCCESS);
1168 }
1169 
1170 static PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1171 {
1172   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1173 
1174   PetscFunctionBegin;
1175   PetscCall(MatScale(a->A, aa));
1176   PetscCall(MatScale(a->B, aa));
1177   PetscFunctionReturn(PETSC_SUCCESS);
1178 }
1179 
1180 static PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1181 {
1182   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1183   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1184   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1185   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1186   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1187 
1188   PetscFunctionBegin;
1189   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1190   mat->getrowactive = PETSC_TRUE;
1191 
1192   if (!mat->rowvalues && (idx || v)) {
1193     /*
1194         allocate enough space to hold information from the longest row.
1195     */
1196     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1197     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1198     PetscInt      max = 1, mbs = mat->mbs, tmp;
1199     for (i = 0; i < mbs; i++) {
1200       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1201       if (max < tmp) max = tmp;
1202     }
1203     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1204   }
1205 
1206   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1207   lrow = row - brstart; /* local row index */
1208 
1209   pvA = &vworkA;
1210   pcA = &cworkA;
1211   pvB = &vworkB;
1212   pcB = &cworkB;
1213   if (!v) {
1214     pvA = NULL;
1215     pvB = NULL;
1216   }
1217   if (!idx) {
1218     pcA = NULL;
1219     if (!v) pcB = NULL;
1220   }
1221   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1222   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1223   nztot = nzA + nzB;
1224 
1225   cmap = mat->garray;
1226   if (v || idx) {
1227     if (nztot) {
1228       /* Sort by increasing column numbers, assuming A and B already sorted */
1229       PetscInt imark = -1;
1230       if (v) {
1231         *v = v_p = mat->rowvalues;
1232         for (i = 0; i < nzB; i++) {
1233           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1234           else break;
1235         }
1236         imark = i;
1237         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1238         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1239       }
1240       if (idx) {
1241         *idx = idx_p = mat->rowindices;
1242         if (imark > -1) {
1243           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1244         } else {
1245           for (i = 0; i < nzB; i++) {
1246             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1247             else break;
1248           }
1249           imark = i;
1250         }
1251         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1252         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1253       }
1254     } else {
1255       if (idx) *idx = NULL;
1256       if (v) *v = NULL;
1257     }
1258   }
1259   *nz = nztot;
1260   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1261   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1262   PetscFunctionReturn(PETSC_SUCCESS);
1263 }
1264 
1265 static PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1266 {
1267   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1268 
1269   PetscFunctionBegin;
1270   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1271   baij->getrowactive = PETSC_FALSE;
1272   PetscFunctionReturn(PETSC_SUCCESS);
1273 }
1274 
1275 static PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1276 {
1277   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1278   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1279 
1280   PetscFunctionBegin;
1281   aA->getrow_utriangular = PETSC_TRUE;
1282   PetscFunctionReturn(PETSC_SUCCESS);
1283 }
1284 static PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1285 {
1286   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1287   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1288 
1289   PetscFunctionBegin;
1290   aA->getrow_utriangular = PETSC_FALSE;
1291   PetscFunctionReturn(PETSC_SUCCESS);
1292 }
1293 
1294 static PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1295 {
1296   PetscFunctionBegin;
1297   if (PetscDefined(USE_COMPLEX)) {
1298     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1299 
1300     PetscCall(MatConjugate(a->A));
1301     PetscCall(MatConjugate(a->B));
1302   }
1303   PetscFunctionReturn(PETSC_SUCCESS);
1304 }
1305 
1306 static PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1307 {
1308   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1309 
1310   PetscFunctionBegin;
1311   PetscCall(MatRealPart(a->A));
1312   PetscCall(MatRealPart(a->B));
1313   PetscFunctionReturn(PETSC_SUCCESS);
1314 }
1315 
1316 static PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1317 {
1318   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1319 
1320   PetscFunctionBegin;
1321   PetscCall(MatImaginaryPart(a->A));
1322   PetscCall(MatImaginaryPart(a->B));
1323   PetscFunctionReturn(PETSC_SUCCESS);
1324 }
1325 
1326 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1327    Input: isrow       - distributed(parallel),
1328           iscol_local - locally owned (seq)
1329 */
1330 static PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1331 {
1332   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
1333   const PetscInt *ptr1, *ptr2;
1334 
1335   PetscFunctionBegin;
1336   *flg = PETSC_FALSE;
1337   PetscCall(ISGetLocalSize(isrow, &sz1));
1338   PetscCall(ISGetLocalSize(iscol_local, &sz2));
1339   if (sz1 > sz2) PetscFunctionReturn(PETSC_SUCCESS);
1340 
1341   PetscCall(ISGetIndices(isrow, &ptr1));
1342   PetscCall(ISGetIndices(iscol_local, &ptr2));
1343 
1344   PetscCall(PetscMalloc1(sz1, &a1));
1345   PetscCall(PetscMalloc1(sz2, &a2));
1346   PetscCall(PetscArraycpy(a1, ptr1, sz1));
1347   PetscCall(PetscArraycpy(a2, ptr2, sz2));
1348   PetscCall(PetscSortInt(sz1, a1));
1349   PetscCall(PetscSortInt(sz2, a2));
1350 
1351   nmatch = 0;
1352   k      = 0;
1353   for (i = 0; i < sz1; i++) {
1354     for (j = k; j < sz2; j++) {
1355       if (a1[i] == a2[j]) {
1356         k = j;
1357         nmatch++;
1358         break;
1359       }
1360     }
1361   }
1362   PetscCall(ISRestoreIndices(isrow, &ptr1));
1363   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1364   PetscCall(PetscFree(a1));
1365   PetscCall(PetscFree(a2));
1366   if (nmatch < sz1) {
1367     *flg = PETSC_FALSE;
1368   } else {
1369     *flg = PETSC_TRUE;
1370   }
1371   PetscFunctionReturn(PETSC_SUCCESS);
1372 }
1373 
1374 static PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1375 {
1376   Mat       C[2];
1377   IS        iscol_local, isrow_local;
1378   PetscInt  csize, csize_local, rsize;
1379   PetscBool isequal, issorted, isidentity = PETSC_FALSE;
1380 
1381   PetscFunctionBegin;
1382   PetscCall(ISGetLocalSize(iscol, &csize));
1383   PetscCall(ISGetLocalSize(isrow, &rsize));
1384   if (call == MAT_REUSE_MATRIX) {
1385     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1386     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1387   } else {
1388     PetscCall(ISAllGather(iscol, &iscol_local));
1389     PetscCall(ISSorted(iscol_local, &issorted));
1390     PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must be sorted");
1391   }
1392   PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1393   if (!isequal) {
1394     PetscCall(ISGetLocalSize(iscol_local, &csize_local));
1395     isidentity = (PetscBool)(mat->cmap->N == csize_local);
1396     if (!isidentity) {
1397       if (call == MAT_REUSE_MATRIX) {
1398         PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather_other", (PetscObject *)&isrow_local));
1399         PetscCheck(isrow_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1400       } else {
1401         PetscCall(ISAllGather(isrow, &isrow_local));
1402         PetscCall(ISSorted(isrow_local, &issorted));
1403         PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, isrow must be sorted");
1404       }
1405     }
1406   }
1407   /* now call MatCreateSubMatrix_MPIBAIJ() */
1408   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, isequal || isidentity ? call : MAT_INITIAL_MATRIX, isequal || isidentity ? newmat : C, (PetscBool)(isequal || isidentity)));
1409   if (!isequal && !isidentity) {
1410     if (call == MAT_INITIAL_MATRIX) {
1411       IS       intersect;
1412       PetscInt ni;
1413 
1414       PetscCall(ISIntersect(isrow_local, iscol_local, &intersect));
1415       PetscCall(ISGetLocalSize(intersect, &ni));
1416       PetscCall(ISDestroy(&intersect));
1417       PetscCheck(ni == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create such a submatrix: for symmetric format, when requesting an off-diagonal submatrix, isrow and iscol should have an empty intersection (number of common indices is %" PetscInt_FMT ")", ni);
1418     }
1419     PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, iscol, isrow_local, rsize, MAT_INITIAL_MATRIX, C + 1, PETSC_FALSE));
1420     PetscCall(MatTranspose(C[1], MAT_INPLACE_MATRIX, C + 1));
1421     PetscCall(MatAXPY(C[0], 1.0, C[1], DIFFERENT_NONZERO_PATTERN));
1422     if (call == MAT_REUSE_MATRIX) PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN));
1423     else if (mat->rmap->bs == 1) PetscCall(MatConvert(C[0], MATAIJ, MAT_INITIAL_MATRIX, newmat));
1424     else PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN));
1425     PetscCall(MatDestroy(C));
1426     PetscCall(MatDestroy(C + 1));
1427   }
1428   if (call == MAT_INITIAL_MATRIX) {
1429     if (!isequal && !isidentity) {
1430       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather_other", (PetscObject)isrow_local));
1431       PetscCall(ISDestroy(&isrow_local));
1432     }
1433     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1434     PetscCall(ISDestroy(&iscol_local));
1435   }
1436   PetscFunctionReturn(PETSC_SUCCESS);
1437 }
1438 
1439 static PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1440 {
1441   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1442 
1443   PetscFunctionBegin;
1444   PetscCall(MatZeroEntries(l->A));
1445   PetscCall(MatZeroEntries(l->B));
1446   PetscFunctionReturn(PETSC_SUCCESS);
1447 }
1448 
1449 static PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1450 {
1451   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1452   Mat            A = a->A, B = a->B;
1453   PetscLogDouble isend[5], irecv[5];
1454 
1455   PetscFunctionBegin;
1456   info->block_size = (PetscReal)matin->rmap->bs;
1457 
1458   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1459 
1460   isend[0] = info->nz_used;
1461   isend[1] = info->nz_allocated;
1462   isend[2] = info->nz_unneeded;
1463   isend[3] = info->memory;
1464   isend[4] = info->mallocs;
1465 
1466   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1467 
1468   isend[0] += info->nz_used;
1469   isend[1] += info->nz_allocated;
1470   isend[2] += info->nz_unneeded;
1471   isend[3] += info->memory;
1472   isend[4] += info->mallocs;
1473   if (flag == MAT_LOCAL) {
1474     info->nz_used      = isend[0];
1475     info->nz_allocated = isend[1];
1476     info->nz_unneeded  = isend[2];
1477     info->memory       = isend[3];
1478     info->mallocs      = isend[4];
1479   } else if (flag == MAT_GLOBAL_MAX) {
1480     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1481 
1482     info->nz_used      = irecv[0];
1483     info->nz_allocated = irecv[1];
1484     info->nz_unneeded  = irecv[2];
1485     info->memory       = irecv[3];
1486     info->mallocs      = irecv[4];
1487   } else if (flag == MAT_GLOBAL_SUM) {
1488     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1489 
1490     info->nz_used      = irecv[0];
1491     info->nz_allocated = irecv[1];
1492     info->nz_unneeded  = irecv[2];
1493     info->memory       = irecv[3];
1494     info->mallocs      = irecv[4];
1495   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1496   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1497   info->fill_ratio_needed = 0;
1498   info->factor_mallocs    = 0;
1499   PetscFunctionReturn(PETSC_SUCCESS);
1500 }
1501 
1502 static PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1503 {
1504   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1505   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1506 
1507   PetscFunctionBegin;
1508   switch (op) {
1509   case MAT_NEW_NONZERO_LOCATIONS:
1510   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1511   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1512   case MAT_KEEP_NONZERO_PATTERN:
1513   case MAT_SUBMAT_SINGLEIS:
1514   case MAT_NEW_NONZERO_LOCATION_ERR:
1515     MatCheckPreallocated(A, 1);
1516     PetscCall(MatSetOption(a->A, op, flg));
1517     PetscCall(MatSetOption(a->B, op, flg));
1518     break;
1519   case MAT_ROW_ORIENTED:
1520     MatCheckPreallocated(A, 1);
1521     a->roworiented = flg;
1522 
1523     PetscCall(MatSetOption(a->A, op, flg));
1524     PetscCall(MatSetOption(a->B, op, flg));
1525     break;
1526   case MAT_FORCE_DIAGONAL_ENTRIES:
1527   case MAT_SORTED_FULL:
1528     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1529     break;
1530   case MAT_IGNORE_OFF_PROC_ENTRIES:
1531     a->donotstash = flg;
1532     break;
1533   case MAT_USE_HASH_TABLE:
1534     a->ht_flag = flg;
1535     break;
1536   case MAT_HERMITIAN:
1537     MatCheckPreallocated(A, 1);
1538     PetscCall(MatSetOption(a->A, op, flg));
1539 #if defined(PETSC_USE_COMPLEX)
1540     if (flg) { /* need different mat-vec ops */
1541       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1542       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1543       A->ops->multtranspose    = NULL;
1544       A->ops->multtransposeadd = NULL;
1545       A->symmetric             = PETSC_BOOL3_FALSE;
1546     }
1547 #endif
1548     break;
1549   case MAT_SPD:
1550   case MAT_SYMMETRIC:
1551     MatCheckPreallocated(A, 1);
1552     PetscCall(MatSetOption(a->A, op, flg));
1553 #if defined(PETSC_USE_COMPLEX)
1554     if (flg) { /* restore to use default mat-vec ops */
1555       A->ops->mult             = MatMult_MPISBAIJ;
1556       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1557       A->ops->multtranspose    = MatMult_MPISBAIJ;
1558       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1559     }
1560 #endif
1561     break;
1562   case MAT_STRUCTURALLY_SYMMETRIC:
1563     MatCheckPreallocated(A, 1);
1564     PetscCall(MatSetOption(a->A, op, flg));
1565     break;
1566   case MAT_SYMMETRY_ETERNAL:
1567   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1568     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
1569     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1570     break;
1571   case MAT_SPD_ETERNAL:
1572     break;
1573   case MAT_IGNORE_LOWER_TRIANGULAR:
1574     aA->ignore_ltriangular = flg;
1575     break;
1576   case MAT_ERROR_LOWER_TRIANGULAR:
1577     aA->ignore_ltriangular = flg;
1578     break;
1579   case MAT_GETROW_UPPERTRIANGULAR:
1580     aA->getrow_utriangular = flg;
1581     break;
1582   default:
1583     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1584   }
1585   PetscFunctionReturn(PETSC_SUCCESS);
1586 }
1587 
1588 static PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1589 {
1590   PetscFunctionBegin;
1591   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1592   if (reuse == MAT_INITIAL_MATRIX) {
1593     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1594   } else if (reuse == MAT_REUSE_MATRIX) {
1595     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1596   }
1597   PetscFunctionReturn(PETSC_SUCCESS);
1598 }
1599 
1600 static PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1601 {
1602   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1603   Mat           a = baij->A, b = baij->B;
1604   PetscInt      nv, m, n;
1605   PetscBool     flg;
1606 
1607   PetscFunctionBegin;
1608   if (ll != rr) {
1609     PetscCall(VecEqual(ll, rr, &flg));
1610     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1611   }
1612   if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1613 
1614   PetscCall(MatGetLocalSize(mat, &m, &n));
1615   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1616 
1617   PetscCall(VecGetLocalSize(rr, &nv));
1618   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1619 
1620   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1621 
1622   /* left diagonalscale the off-diagonal part */
1623   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1624 
1625   /* scale the diagonal part */
1626   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1627 
1628   /* right diagonalscale the off-diagonal part */
1629   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1630   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1631   PetscFunctionReturn(PETSC_SUCCESS);
1632 }
1633 
1634 static PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1635 {
1636   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1637 
1638   PetscFunctionBegin;
1639   PetscCall(MatSetUnfactored(a->A));
1640   PetscFunctionReturn(PETSC_SUCCESS);
1641 }
1642 
1643 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1644 
1645 static PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1646 {
1647   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1648   Mat           a, b, c, d;
1649   PetscBool     flg;
1650 
1651   PetscFunctionBegin;
1652   a = matA->A;
1653   b = matA->B;
1654   c = matB->A;
1655   d = matB->B;
1656 
1657   PetscCall(MatEqual(a, c, &flg));
1658   if (flg) PetscCall(MatEqual(b, d, &flg));
1659   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1660   PetscFunctionReturn(PETSC_SUCCESS);
1661 }
1662 
1663 static PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1664 {
1665   PetscBool isbaij;
1666 
1667   PetscFunctionBegin;
1668   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1669   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1670   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1671   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1672     PetscCall(MatGetRowUpperTriangular(A));
1673     PetscCall(MatCopy_Basic(A, B, str));
1674     PetscCall(MatRestoreRowUpperTriangular(A));
1675   } else {
1676     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1677     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1678 
1679     PetscCall(MatCopy(a->A, b->A, str));
1680     PetscCall(MatCopy(a->B, b->B, str));
1681   }
1682   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1683   PetscFunctionReturn(PETSC_SUCCESS);
1684 }
1685 
1686 static PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1687 {
1688   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1689   PetscBLASInt  bnz, one                          = 1;
1690   Mat_SeqSBAIJ *xa, *ya;
1691   Mat_SeqBAIJ  *xb, *yb;
1692 
1693   PetscFunctionBegin;
1694   if (str == SAME_NONZERO_PATTERN) {
1695     PetscScalar alpha = a;
1696     xa                = (Mat_SeqSBAIJ *)xx->A->data;
1697     ya                = (Mat_SeqSBAIJ *)yy->A->data;
1698     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1699     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1700     xb = (Mat_SeqBAIJ *)xx->B->data;
1701     yb = (Mat_SeqBAIJ *)yy->B->data;
1702     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1703     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1704     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1705   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1706     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1707     PetscCall(MatAXPY_Basic(Y, a, X, str));
1708     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1709   } else {
1710     Mat       B;
1711     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1712     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1713     PetscCall(MatGetRowUpperTriangular(X));
1714     PetscCall(MatGetRowUpperTriangular(Y));
1715     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1716     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1717     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1718     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1719     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1720     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1721     PetscCall(MatSetType(B, MATMPISBAIJ));
1722     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1723     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1724     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1725     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1726     PetscCall(MatHeaderMerge(Y, &B));
1727     PetscCall(PetscFree(nnz_d));
1728     PetscCall(PetscFree(nnz_o));
1729     PetscCall(MatRestoreRowUpperTriangular(X));
1730     PetscCall(MatRestoreRowUpperTriangular(Y));
1731   }
1732   PetscFunctionReturn(PETSC_SUCCESS);
1733 }
1734 
1735 static PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1736 {
1737   PetscInt  i;
1738   PetscBool flg;
1739 
1740   PetscFunctionBegin;
1741   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1742   for (i = 0; i < n; i++) {
1743     PetscCall(ISEqual(irow[i], icol[i], &flg));
1744     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1745   }
1746   PetscFunctionReturn(PETSC_SUCCESS);
1747 }
1748 
1749 static PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1750 {
1751   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1752   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
1753 
1754   PetscFunctionBegin;
1755   if (!Y->preallocated) {
1756     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1757   } else if (!aij->nz) {
1758     PetscInt nonew = aij->nonew;
1759     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1760     aij->nonew = nonew;
1761   }
1762   PetscCall(MatShift_Basic(Y, a));
1763   PetscFunctionReturn(PETSC_SUCCESS);
1764 }
1765 
1766 static PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1767 {
1768   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1769 
1770   PetscFunctionBegin;
1771   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
1772   PetscCall(MatMissingDiagonal(a->A, missing, d));
1773   if (d) {
1774     PetscInt rstart;
1775     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1776     *d += rstart / A->rmap->bs;
1777   }
1778   PetscFunctionReturn(PETSC_SUCCESS);
1779 }
1780 
1781 static PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1782 {
1783   PetscFunctionBegin;
1784   *a = ((Mat_MPISBAIJ *)A->data)->A;
1785   PetscFunctionReturn(PETSC_SUCCESS);
1786 }
1787 
1788 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep)
1789 {
1790   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1791 
1792   PetscFunctionBegin;
1793   PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep));       // possibly keep zero diagonal coefficients
1794   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
1795   PetscFunctionReturn(PETSC_SUCCESS);
1796 }
1797 
1798 static PetscErrorCode MatLoad_MPISBAIJ(Mat, PetscViewer);
1799 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat, Vec, PetscInt[]);
1800 static PetscErrorCode MatSOR_MPISBAIJ(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec);
1801 
1802 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1803                                        MatGetRow_MPISBAIJ,
1804                                        MatRestoreRow_MPISBAIJ,
1805                                        MatMult_MPISBAIJ,
1806                                        /*  4*/ MatMultAdd_MPISBAIJ,
1807                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1808                                        MatMultAdd_MPISBAIJ,
1809                                        NULL,
1810                                        NULL,
1811                                        NULL,
1812                                        /* 10*/ NULL,
1813                                        NULL,
1814                                        NULL,
1815                                        MatSOR_MPISBAIJ,
1816                                        MatTranspose_MPISBAIJ,
1817                                        /* 15*/ MatGetInfo_MPISBAIJ,
1818                                        MatEqual_MPISBAIJ,
1819                                        MatGetDiagonal_MPISBAIJ,
1820                                        MatDiagonalScale_MPISBAIJ,
1821                                        MatNorm_MPISBAIJ,
1822                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1823                                        MatAssemblyEnd_MPISBAIJ,
1824                                        MatSetOption_MPISBAIJ,
1825                                        MatZeroEntries_MPISBAIJ,
1826                                        /* 24*/ NULL,
1827                                        NULL,
1828                                        NULL,
1829                                        NULL,
1830                                        NULL,
1831                                        /* 29*/ MatSetUp_MPI_Hash,
1832                                        NULL,
1833                                        NULL,
1834                                        MatGetDiagonalBlock_MPISBAIJ,
1835                                        NULL,
1836                                        /* 34*/ MatDuplicate_MPISBAIJ,
1837                                        NULL,
1838                                        NULL,
1839                                        NULL,
1840                                        NULL,
1841                                        /* 39*/ MatAXPY_MPISBAIJ,
1842                                        MatCreateSubMatrices_MPISBAIJ,
1843                                        MatIncreaseOverlap_MPISBAIJ,
1844                                        MatGetValues_MPISBAIJ,
1845                                        MatCopy_MPISBAIJ,
1846                                        /* 44*/ NULL,
1847                                        MatScale_MPISBAIJ,
1848                                        MatShift_MPISBAIJ,
1849                                        NULL,
1850                                        NULL,
1851                                        /* 49*/ NULL,
1852                                        NULL,
1853                                        NULL,
1854                                        NULL,
1855                                        NULL,
1856                                        /* 54*/ NULL,
1857                                        NULL,
1858                                        MatSetUnfactored_MPISBAIJ,
1859                                        NULL,
1860                                        MatSetValuesBlocked_MPISBAIJ,
1861                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1862                                        NULL,
1863                                        NULL,
1864                                        NULL,
1865                                        NULL,
1866                                        /* 64*/ NULL,
1867                                        NULL,
1868                                        NULL,
1869                                        NULL,
1870                                        NULL,
1871                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1872                                        NULL,
1873                                        MatConvert_MPISBAIJ_Basic,
1874                                        NULL,
1875                                        NULL,
1876                                        /* 74*/ NULL,
1877                                        NULL,
1878                                        NULL,
1879                                        NULL,
1880                                        NULL,
1881                                        /* 79*/ NULL,
1882                                        NULL,
1883                                        NULL,
1884                                        NULL,
1885                                        MatLoad_MPISBAIJ,
1886                                        /* 84*/ NULL,
1887                                        NULL,
1888                                        NULL,
1889                                        NULL,
1890                                        NULL,
1891                                        /* 89*/ NULL,
1892                                        NULL,
1893                                        NULL,
1894                                        NULL,
1895                                        NULL,
1896                                        /* 94*/ NULL,
1897                                        NULL,
1898                                        NULL,
1899                                        NULL,
1900                                        NULL,
1901                                        /* 99*/ NULL,
1902                                        NULL,
1903                                        NULL,
1904                                        MatConjugate_MPISBAIJ,
1905                                        NULL,
1906                                        /*104*/ NULL,
1907                                        MatRealPart_MPISBAIJ,
1908                                        MatImaginaryPart_MPISBAIJ,
1909                                        MatGetRowUpperTriangular_MPISBAIJ,
1910                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1911                                        /*109*/ NULL,
1912                                        NULL,
1913                                        NULL,
1914                                        NULL,
1915                                        MatMissingDiagonal_MPISBAIJ,
1916                                        /*114*/ NULL,
1917                                        NULL,
1918                                        NULL,
1919                                        NULL,
1920                                        NULL,
1921                                        /*119*/ NULL,
1922                                        NULL,
1923                                        NULL,
1924                                        NULL,
1925                                        NULL,
1926                                        /*124*/ NULL,
1927                                        NULL,
1928                                        NULL,
1929                                        NULL,
1930                                        NULL,
1931                                        /*129*/ NULL,
1932                                        NULL,
1933                                        NULL,
1934                                        NULL,
1935                                        NULL,
1936                                        /*134*/ NULL,
1937                                        NULL,
1938                                        NULL,
1939                                        NULL,
1940                                        NULL,
1941                                        /*139*/ MatSetBlockSizes_Default,
1942                                        NULL,
1943                                        NULL,
1944                                        NULL,
1945                                        NULL,
1946                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1947                                        NULL,
1948                                        NULL,
1949                                        NULL,
1950                                        NULL,
1951                                        NULL,
1952                                        /*150*/ NULL,
1953                                        MatEliminateZeros_MPISBAIJ,
1954                                        NULL,
1955                                        NULL,
1956                                        NULL,
1957                                        /*155*/ NULL,
1958                                        MatCopyHashToXAIJ_MPI_Hash};
1959 
1960 static PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1961 {
1962   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1963   PetscInt      i, mbs, Mbs;
1964   PetscMPIInt   size;
1965 
1966   PetscFunctionBegin;
1967   if (B->hash_active) {
1968     B->ops[0]      = b->cops;
1969     B->hash_active = PETSC_FALSE;
1970   }
1971   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1972   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
1973   PetscCall(PetscLayoutSetUp(B->rmap));
1974   PetscCall(PetscLayoutSetUp(B->cmap));
1975   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1976   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1977   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1978 
1979   mbs = B->rmap->n / bs;
1980   Mbs = B->rmap->N / bs;
1981   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1982 
1983   B->rmap->bs = bs;
1984   b->bs2      = bs * bs;
1985   b->mbs      = mbs;
1986   b->Mbs      = Mbs;
1987   b->nbs      = B->cmap->n / bs;
1988   b->Nbs      = B->cmap->N / bs;
1989 
1990   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1991   b->rstartbs = B->rmap->rstart / bs;
1992   b->rendbs   = B->rmap->rend / bs;
1993 
1994   b->cstartbs = B->cmap->rstart / bs;
1995   b->cendbs   = B->cmap->rend / bs;
1996 
1997 #if defined(PETSC_USE_CTABLE)
1998   PetscCall(PetscHMapIDestroy(&b->colmap));
1999 #else
2000   PetscCall(PetscFree(b->colmap));
2001 #endif
2002   PetscCall(PetscFree(b->garray));
2003   PetscCall(VecDestroy(&b->lvec));
2004   PetscCall(VecScatterDestroy(&b->Mvctx));
2005   PetscCall(VecDestroy(&b->slvec0));
2006   PetscCall(VecDestroy(&b->slvec0b));
2007   PetscCall(VecDestroy(&b->slvec1));
2008   PetscCall(VecDestroy(&b->slvec1a));
2009   PetscCall(VecDestroy(&b->slvec1b));
2010   PetscCall(VecScatterDestroy(&b->sMvctx));
2011 
2012   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2013 
2014   MatSeqXAIJGetOptions_Private(b->B);
2015   PetscCall(MatDestroy(&b->B));
2016   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2017   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2018   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2019   MatSeqXAIJRestoreOptions_Private(b->B);
2020 
2021   MatSeqXAIJGetOptions_Private(b->A);
2022   PetscCall(MatDestroy(&b->A));
2023   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2024   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2025   PetscCall(MatSetType(b->A, MATSEQSBAIJ));
2026   MatSeqXAIJRestoreOptions_Private(b->A);
2027 
2028   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2029   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2030 
2031   B->preallocated  = PETSC_TRUE;
2032   B->was_assembled = PETSC_FALSE;
2033   B->assembled     = PETSC_FALSE;
2034   PetscFunctionReturn(PETSC_SUCCESS);
2035 }
2036 
2037 static PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2038 {
2039   PetscInt        m, rstart, cend;
2040   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2041   const PetscInt *JJ          = NULL;
2042   PetscScalar    *values      = NULL;
2043   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
2044   PetscBool       nooffprocentries;
2045 
2046   PetscFunctionBegin;
2047   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
2048   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2049   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2050   PetscCall(PetscLayoutSetUp(B->rmap));
2051   PetscCall(PetscLayoutSetUp(B->cmap));
2052   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2053   m      = B->rmap->n / bs;
2054   rstart = B->rmap->rstart / bs;
2055   cend   = B->cmap->rend / bs;
2056 
2057   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2058   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2059   for (i = 0; i < m; i++) {
2060     nz = ii[i + 1] - ii[i];
2061     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2062     /* count the ones on the diagonal and above, split into diagonal and off-diagonal portions. */
2063     JJ = jj + ii[i];
2064     bd = 0;
2065     for (j = 0; j < nz; j++) {
2066       if (*JJ >= i + rstart) break;
2067       JJ++;
2068       bd++;
2069     }
2070     d = 0;
2071     for (; j < nz; j++) {
2072       if (*JJ++ >= cend) break;
2073       d++;
2074     }
2075     d_nnz[i] = d;
2076     o_nnz[i] = nz - d - bd;
2077     nz       = nz - bd;
2078     nz_max   = PetscMax(nz_max, nz);
2079   }
2080   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2081   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
2082   PetscCall(PetscFree2(d_nnz, o_nnz));
2083 
2084   values = (PetscScalar *)V;
2085   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2086   for (i = 0; i < m; i++) {
2087     PetscInt        row   = i + rstart;
2088     PetscInt        ncols = ii[i + 1] - ii[i];
2089     const PetscInt *icols = jj + ii[i];
2090     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2091       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2092       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2093     } else { /* block ordering does not match so we can only insert one block at a time. */
2094       PetscInt j;
2095       for (j = 0; j < ncols; j++) {
2096         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2097         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2098       }
2099     }
2100   }
2101 
2102   if (!V) PetscCall(PetscFree(values));
2103   nooffprocentries    = B->nooffprocentries;
2104   B->nooffprocentries = PETSC_TRUE;
2105   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2106   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2107   B->nooffprocentries = nooffprocentries;
2108 
2109   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2110   PetscFunctionReturn(PETSC_SUCCESS);
2111 }
2112 
2113 /*MC
2114    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2115    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2116    the matrix is stored.
2117 
2118    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2119    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2120 
2121    Options Database Key:
2122 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2123 
2124    Level: beginner
2125 
2126    Note:
2127      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2128      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2129 
2130 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2131 M*/
2132 
2133 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2134 {
2135   Mat_MPISBAIJ *b;
2136   PetscBool     flg = PETSC_FALSE;
2137 
2138   PetscFunctionBegin;
2139   PetscCall(PetscNew(&b));
2140   B->data   = (void *)b;
2141   B->ops[0] = MatOps_Values;
2142 
2143   B->ops->destroy = MatDestroy_MPISBAIJ;
2144   B->ops->view    = MatView_MPISBAIJ;
2145   B->assembled    = PETSC_FALSE;
2146   B->insertmode   = NOT_SET_VALUES;
2147 
2148   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2149   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2150 
2151   /* build local table of row and column ownerships */
2152   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2153 
2154   /* build cache for off array entries formed */
2155   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2156 
2157   b->donotstash  = PETSC_FALSE;
2158   b->colmap      = NULL;
2159   b->garray      = NULL;
2160   b->roworiented = PETSC_TRUE;
2161 
2162   /* stuff used in block assembly */
2163   b->barray = NULL;
2164 
2165   /* stuff used for matrix vector multiply */
2166   b->lvec    = NULL;
2167   b->Mvctx   = NULL;
2168   b->slvec0  = NULL;
2169   b->slvec0b = NULL;
2170   b->slvec1  = NULL;
2171   b->slvec1a = NULL;
2172   b->slvec1b = NULL;
2173   b->sMvctx  = NULL;
2174 
2175   /* stuff for MatGetRow() */
2176   b->rowindices   = NULL;
2177   b->rowvalues    = NULL;
2178   b->getrowactive = PETSC_FALSE;
2179 
2180   /* hash table stuff */
2181   b->ht           = NULL;
2182   b->hd           = NULL;
2183   b->ht_size      = 0;
2184   b->ht_flag      = PETSC_FALSE;
2185   b->ht_fact      = 0;
2186   b->ht_total_ct  = 0;
2187   b->ht_insert_ct = 0;
2188 
2189   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2190   b->ijonly = PETSC_FALSE;
2191 
2192   b->in_loc = NULL;
2193   b->v_loc  = NULL;
2194   b->n_loc  = 0;
2195 
2196   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2197   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2198   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2199   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2200 #if defined(PETSC_HAVE_ELEMENTAL)
2201   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2202 #endif
2203 #if defined(PETSC_HAVE_SCALAPACK)
2204   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2205 #endif
2206   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2207   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2208 
2209   B->symmetric                   = PETSC_BOOL3_TRUE;
2210   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2211   B->symmetry_eternal            = PETSC_TRUE;
2212   B->structural_symmetry_eternal = PETSC_TRUE;
2213 #if defined(PETSC_USE_COMPLEX)
2214   B->hermitian = PETSC_BOOL3_FALSE;
2215 #else
2216   B->hermitian = PETSC_BOOL3_TRUE;
2217 #endif
2218 
2219   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2220   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2221   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2222   if (flg) {
2223     PetscReal fact = 1.39;
2224     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2225     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2226     if (fact <= 1.0) fact = 1.39;
2227     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2228     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2229   }
2230   PetscOptionsEnd();
2231   PetscFunctionReturn(PETSC_SUCCESS);
2232 }
2233 
2234 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2235 /*MC
2236    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2237 
2238    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2239    and `MATMPISBAIJ` otherwise.
2240 
2241    Options Database Key:
2242 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2243 
2244   Level: beginner
2245 
2246 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2247 M*/
2248 
2249 /*@
2250   MatMPISBAIJSetPreallocation - For good matrix assembly performance
2251   the user should preallocate the matrix storage by setting the parameters
2252   d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2253   performance can be increased by more than a factor of 50.
2254 
2255   Collective
2256 
2257   Input Parameters:
2258 + B     - the matrix
2259 . bs    - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2260           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2261 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2262           submatrix  (same for all local rows)
2263 . d_nnz - array containing the number of block nonzeros in the various block rows
2264           in the upper triangular and diagonal part of the in diagonal portion of the local
2265           (possibly different for each block row) or `NULL`.  If you plan to factor the matrix you must leave room
2266           for the diagonal entry and set a value even if it is zero.
2267 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2268           submatrix (same for all local rows).
2269 - o_nnz - array containing the number of nonzeros in the various block rows of the
2270           off-diagonal portion of the local submatrix that is right of the diagonal
2271           (possibly different for each block row) or `NULL`.
2272 
2273   Options Database Keys:
2274 + -mat_no_unroll  - uses code that does not unroll the loops in the
2275                     block calculations (much slower)
2276 - -mat_block_size - size of the blocks to use
2277 
2278   Level: intermediate
2279 
2280   Notes:
2281 
2282   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2283   than it must be used on all processors that share the object for that argument.
2284 
2285   If the *_nnz parameter is given then the *_nz parameter is ignored
2286 
2287   Storage Information:
2288   For a square global matrix we define each processor's diagonal portion
2289   to be its local rows and the corresponding columns (a square submatrix);
2290   each processor's off-diagonal portion encompasses the remainder of the
2291   local matrix (a rectangular submatrix).
2292 
2293   The user can specify preallocated storage for the diagonal part of
2294   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
2295   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2296   memory allocation.  Likewise, specify preallocated storage for the
2297   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2298 
2299   You can call `MatGetInfo()` to get information on how effective the preallocation was;
2300   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2301   You can also run with the option `-info` and look for messages with the string
2302   malloc in them to see if additional memory allocation was needed.
2303 
2304   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2305   the figure below we depict these three local rows and all columns (0-11).
2306 
2307 .vb
2308            0 1 2 3 4 5 6 7 8 9 10 11
2309           --------------------------
2310    row 3  |. . . d d d o o o o  o  o
2311    row 4  |. . . d d d o o o o  o  o
2312    row 5  |. . . d d d o o o o  o  o
2313           --------------------------
2314 .ve
2315 
2316   Thus, any entries in the d locations are stored in the d (diagonal)
2317   submatrix, and any entries in the o locations are stored in the
2318   o (off-diagonal) submatrix.  Note that the d matrix is stored in
2319   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2320 
2321   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2322   plus the diagonal part of the d matrix,
2323   and `o_nz` should indicate the number of block nonzeros per row in the o matrix
2324 
2325   In general, for PDE problems in which most nonzeros are near the diagonal,
2326   one expects `d_nz` >> `o_nz`.
2327 
2328 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2329 @*/
2330 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2331 {
2332   PetscFunctionBegin;
2333   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2334   PetscValidType(B, 1);
2335   PetscValidLogicalCollectiveInt(B, bs, 2);
2336   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2337   PetscFunctionReturn(PETSC_SUCCESS);
2338 }
2339 
2340 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2341 /*@
2342   MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2343   (block compressed row).  For good matrix assembly performance
2344   the user should preallocate the matrix storage by setting the parameters
2345   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
2346 
2347   Collective
2348 
2349   Input Parameters:
2350 + comm  - MPI communicator
2351 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2352           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2353 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
2354           This value should be the same as the local size used in creating the
2355           y vector for the matrix-vector product y = Ax.
2356 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
2357           This value should be the same as the local size used in creating the
2358           x vector for the matrix-vector product y = Ax.
2359 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2360 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2361 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2362           submatrix (same for all local rows)
2363 . d_nnz - array containing the number of block nonzeros in the various block rows
2364           in the upper triangular portion of the in diagonal portion of the local
2365           (possibly different for each block block row) or `NULL`.
2366           If you plan to factor the matrix you must leave room for the diagonal entry and
2367           set its value even if it is zero.
2368 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2369           submatrix (same for all local rows).
2370 - o_nnz - array containing the number of nonzeros in the various block rows of the
2371           off-diagonal portion of the local submatrix (possibly different for
2372           each block row) or `NULL`.
2373 
2374   Output Parameter:
2375 . A - the matrix
2376 
2377   Options Database Keys:
2378 + -mat_no_unroll  - uses code that does not unroll the loops in the
2379                     block calculations (much slower)
2380 . -mat_block_size - size of the blocks to use
2381 - -mat_mpi        - use the parallel matrix data structures even on one processor
2382                     (defaults to using SeqBAIJ format on one processor)
2383 
2384   Level: intermediate
2385 
2386   Notes:
2387   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2388   MatXXXXSetPreallocation() paradigm instead of this routine directly.
2389   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2390 
2391   The number of rows and columns must be divisible by blocksize.
2392   This matrix type does not support complex Hermitian operation.
2393 
2394   The user MUST specify either the local or global matrix dimensions
2395   (possibly both).
2396 
2397   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2398   than it must be used on all processors that share the object for that argument.
2399 
2400   If `m` and `n` are not `PETSC_DECIDE`, then the values determines the `PetscLayout` of the matrix and the ranges returned by
2401   `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
2402 
2403   If the *_nnz parameter is given then the *_nz parameter is ignored
2404 
2405   Storage Information:
2406   For a square global matrix we define each processor's diagonal portion
2407   to be its local rows and the corresponding columns (a square submatrix);
2408   each processor's off-diagonal portion encompasses the remainder of the
2409   local matrix (a rectangular submatrix).
2410 
2411   The user can specify preallocated storage for the diagonal part of
2412   the local submatrix with either `d_nz` or `d_nnz` (not both). Set
2413   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2414   memory allocation. Likewise, specify preallocated storage for the
2415   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2416 
2417   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2418   the figure below we depict these three local rows and all columns (0-11).
2419 
2420 .vb
2421            0 1 2 3 4 5 6 7 8 9 10 11
2422           --------------------------
2423    row 3  |. . . d d d o o o o  o  o
2424    row 4  |. . . d d d o o o o  o  o
2425    row 5  |. . . d d d o o o o  o  o
2426           --------------------------
2427 .ve
2428 
2429   Thus, any entries in the d locations are stored in the d (diagonal)
2430   submatrix, and any entries in the o locations are stored in the
2431   o (off-diagonal) submatrix. Note that the d matrix is stored in
2432   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2433 
2434   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2435   plus the diagonal part of the d matrix,
2436   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
2437   In general, for PDE problems in which most nonzeros are near the diagonal,
2438   one expects `d_nz` >> `o_nz`.
2439 
2440 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`,
2441           `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
2442 @*/
2443 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2444 {
2445   PetscMPIInt size;
2446 
2447   PetscFunctionBegin;
2448   PetscCall(MatCreate(comm, A));
2449   PetscCall(MatSetSizes(*A, m, n, M, N));
2450   PetscCallMPI(MPI_Comm_size(comm, &size));
2451   if (size > 1) {
2452     PetscCall(MatSetType(*A, MATMPISBAIJ));
2453     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2454   } else {
2455     PetscCall(MatSetType(*A, MATSEQSBAIJ));
2456     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2457   }
2458   PetscFunctionReturn(PETSC_SUCCESS);
2459 }
2460 
2461 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2462 {
2463   Mat           mat;
2464   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2465   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2466   PetscScalar  *array;
2467 
2468   PetscFunctionBegin;
2469   *newmat = NULL;
2470 
2471   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2472   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2473   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2474   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2475   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2476 
2477   if (matin->hash_active) {
2478     PetscCall(MatSetUp(mat));
2479   } else {
2480     mat->factortype   = matin->factortype;
2481     mat->preallocated = PETSC_TRUE;
2482     mat->assembled    = PETSC_TRUE;
2483     mat->insertmode   = NOT_SET_VALUES;
2484 
2485     a      = (Mat_MPISBAIJ *)mat->data;
2486     a->bs2 = oldmat->bs2;
2487     a->mbs = oldmat->mbs;
2488     a->nbs = oldmat->nbs;
2489     a->Mbs = oldmat->Mbs;
2490     a->Nbs = oldmat->Nbs;
2491 
2492     a->size         = oldmat->size;
2493     a->rank         = oldmat->rank;
2494     a->donotstash   = oldmat->donotstash;
2495     a->roworiented  = oldmat->roworiented;
2496     a->rowindices   = NULL;
2497     a->rowvalues    = NULL;
2498     a->getrowactive = PETSC_FALSE;
2499     a->barray       = NULL;
2500     a->rstartbs     = oldmat->rstartbs;
2501     a->rendbs       = oldmat->rendbs;
2502     a->cstartbs     = oldmat->cstartbs;
2503     a->cendbs       = oldmat->cendbs;
2504 
2505     /* hash table stuff */
2506     a->ht           = NULL;
2507     a->hd           = NULL;
2508     a->ht_size      = 0;
2509     a->ht_flag      = oldmat->ht_flag;
2510     a->ht_fact      = oldmat->ht_fact;
2511     a->ht_total_ct  = 0;
2512     a->ht_insert_ct = 0;
2513 
2514     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2515     if (oldmat->colmap) {
2516 #if defined(PETSC_USE_CTABLE)
2517       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2518 #else
2519       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2520       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2521 #endif
2522     } else a->colmap = NULL;
2523 
2524     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
2525       PetscCall(PetscMalloc1(len, &a->garray));
2526       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2527     } else a->garray = NULL;
2528 
2529     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2530     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2531     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2532 
2533     PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2534     PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2535 
2536     PetscCall(VecGetLocalSize(a->slvec1, &nt));
2537     PetscCall(VecGetArray(a->slvec1, &array));
2538     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2539     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2540     PetscCall(VecRestoreArray(a->slvec1, &array));
2541     PetscCall(VecGetArray(a->slvec0, &array));
2542     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2543     PetscCall(VecRestoreArray(a->slvec0, &array));
2544 
2545     /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2546     PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2547     a->sMvctx = oldmat->sMvctx;
2548 
2549     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2550     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2551   }
2552   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2553   *newmat = mat;
2554   PetscFunctionReturn(PETSC_SUCCESS);
2555 }
2556 
2557 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2558 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2559 
2560 static PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2561 {
2562   PetscBool isbinary;
2563 
2564   PetscFunctionBegin;
2565   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2566   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2567   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2568   PetscFunctionReturn(PETSC_SUCCESS);
2569 }
2570 
2571 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2572 {
2573   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2574   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)a->B->data;
2575   PetscReal     atmp;
2576   PetscReal    *work, *svalues, *rvalues;
2577   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2578   PetscMPIInt   rank, size;
2579   PetscInt     *rowners_bs, count, source;
2580   PetscScalar  *va;
2581   MatScalar    *ba;
2582   MPI_Status    stat;
2583 
2584   PetscFunctionBegin;
2585   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2586   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2587   PetscCall(VecGetArray(v, &va));
2588 
2589   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2590   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2591 
2592   bs  = A->rmap->bs;
2593   mbs = a->mbs;
2594   Mbs = a->Mbs;
2595   ba  = b->a;
2596   bi  = b->i;
2597   bj  = b->j;
2598 
2599   /* find ownerships */
2600   rowners_bs = A->rmap->range;
2601 
2602   /* each proc creates an array to be distributed */
2603   PetscCall(PetscCalloc1(bs * Mbs, &work));
2604 
2605   /* row_max for B */
2606   if (rank != size - 1) {
2607     for (i = 0; i < mbs; i++) {
2608       ncols = bi[1] - bi[0];
2609       bi++;
2610       brow = bs * i;
2611       for (j = 0; j < ncols; j++) {
2612         bcol = bs * (*bj);
2613         for (kcol = 0; kcol < bs; kcol++) {
2614           col = bcol + kcol;           /* local col index */
2615           col += rowners_bs[rank + 1]; /* global col index */
2616           for (krow = 0; krow < bs; krow++) {
2617             atmp = PetscAbsScalar(*ba);
2618             ba++;
2619             row = brow + krow; /* local row index */
2620             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2621             if (work[col] < atmp) work[col] = atmp;
2622           }
2623         }
2624         bj++;
2625       }
2626     }
2627 
2628     /* send values to its owners */
2629     for (PetscMPIInt dest = rank + 1; dest < size; dest++) {
2630       svalues = work + rowners_bs[dest];
2631       count   = rowners_bs[dest + 1] - rowners_bs[dest];
2632       PetscCallMPI(MPIU_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2633     }
2634   }
2635 
2636   /* receive values */
2637   if (rank) {
2638     rvalues = work;
2639     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2640     for (source = 0; source < rank; source++) {
2641       PetscCallMPI(MPIU_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2642       /* process values */
2643       for (i = 0; i < count; i++) {
2644         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2645       }
2646     }
2647   }
2648 
2649   PetscCall(VecRestoreArray(v, &va));
2650   PetscCall(PetscFree(work));
2651   PetscFunctionReturn(PETSC_SUCCESS);
2652 }
2653 
2654 static PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2655 {
2656   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2657   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
2658   PetscScalar       *x, *ptr, *from;
2659   Vec                bb1;
2660   const PetscScalar *b;
2661 
2662   PetscFunctionBegin;
2663   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2664   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2665 
2666   if (flag == SOR_APPLY_UPPER) {
2667     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2668     PetscFunctionReturn(PETSC_SUCCESS);
2669   }
2670 
2671   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2672     if (flag & SOR_ZERO_INITIAL_GUESS) {
2673       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2674       its--;
2675     }
2676 
2677     PetscCall(VecDuplicate(bb, &bb1));
2678     while (its--) {
2679       /* lower triangular part: slvec0b = - B^T*xx */
2680       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2681 
2682       /* copy xx into slvec0a */
2683       PetscCall(VecGetArray(mat->slvec0, &ptr));
2684       PetscCall(VecGetArray(xx, &x));
2685       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2686       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2687 
2688       PetscCall(VecScale(mat->slvec0, -1.0));
2689 
2690       /* copy bb into slvec1a */
2691       PetscCall(VecGetArray(mat->slvec1, &ptr));
2692       PetscCall(VecGetArrayRead(bb, &b));
2693       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2694       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2695 
2696       /* set slvec1b = 0 */
2697       PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2698       PetscCall(VecZeroEntries(mat->slvec1b));
2699 
2700       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2701       PetscCall(VecRestoreArray(xx, &x));
2702       PetscCall(VecRestoreArrayRead(bb, &b));
2703       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2704 
2705       /* upper triangular part: bb1 = bb1 - B*x */
2706       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2707 
2708       /* local diagonal sweep */
2709       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2710     }
2711     PetscCall(VecDestroy(&bb1));
2712   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2713     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2714   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2715     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2716   } else if (flag & SOR_EISENSTAT) {
2717     Vec                xx1;
2718     PetscBool          hasop;
2719     const PetscScalar *diag;
2720     PetscScalar       *sl, scale = (omega - 2.0) / omega;
2721     PetscInt           i, n;
2722 
2723     if (!mat->xx1) {
2724       PetscCall(VecDuplicate(bb, &mat->xx1));
2725       PetscCall(VecDuplicate(bb, &mat->bb1));
2726     }
2727     xx1 = mat->xx1;
2728     bb1 = mat->bb1;
2729 
2730     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2731 
2732     if (!mat->diag) {
2733       /* this is wrong for same matrix with new nonzero values */
2734       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2735       PetscCall(MatGetDiagonal(matin, mat->diag));
2736     }
2737     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2738 
2739     if (hasop) {
2740       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2741       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2742     } else {
2743       /*
2744           These two lines are replaced by code that may be a bit faster for a good compiler
2745       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2746       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2747       */
2748       PetscCall(VecGetArray(mat->slvec1a, &sl));
2749       PetscCall(VecGetArrayRead(mat->diag, &diag));
2750       PetscCall(VecGetArrayRead(bb, &b));
2751       PetscCall(VecGetArray(xx, &x));
2752       PetscCall(VecGetLocalSize(xx, &n));
2753       if (omega == 1.0) {
2754         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2755         PetscCall(PetscLogFlops(2.0 * n));
2756       } else {
2757         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2758         PetscCall(PetscLogFlops(3.0 * n));
2759       }
2760       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2761       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2762       PetscCall(VecRestoreArrayRead(bb, &b));
2763       PetscCall(VecRestoreArray(xx, &x));
2764     }
2765 
2766     /* multiply off-diagonal portion of matrix */
2767     PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2768     PetscCall(VecZeroEntries(mat->slvec1b));
2769     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2770     PetscCall(VecGetArray(mat->slvec0, &from));
2771     PetscCall(VecGetArray(xx, &x));
2772     PetscCall(PetscArraycpy(from, x, bs * mbs));
2773     PetscCall(VecRestoreArray(mat->slvec0, &from));
2774     PetscCall(VecRestoreArray(xx, &x));
2775     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2776     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2777     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2778 
2779     /* local sweep */
2780     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2781     PetscCall(VecAXPY(xx, 1.0, xx1));
2782   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2783   PetscFunctionReturn(PETSC_SUCCESS);
2784 }
2785 
2786 /*@
2787   MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard CSR format for the local rows.
2788 
2789   Collective
2790 
2791   Input Parameters:
2792 + comm - MPI communicator
2793 . bs   - the block size, only a block size of 1 is supported
2794 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
2795 . n    - This value should be the same as the local size used in creating the
2796          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
2797          calculated if `N` is given) For square matrices `n` is almost always `m`.
2798 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2799 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2800 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2801 . j    - column indices
2802 - a    - matrix values
2803 
2804   Output Parameter:
2805 . mat - the matrix
2806 
2807   Level: intermediate
2808 
2809   Notes:
2810   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
2811   thus you CANNOT change the matrix entries by changing the values of `a` after you have
2812   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
2813 
2814   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
2815 
2816 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2817           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatMPISBAIJSetPreallocationCSR()`
2818 @*/
2819 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2820 {
2821   PetscFunctionBegin;
2822   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2823   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2824   PetscCall(MatCreate(comm, mat));
2825   PetscCall(MatSetSizes(*mat, m, n, M, N));
2826   PetscCall(MatSetType(*mat, MATMPISBAIJ));
2827   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2828   PetscFunctionReturn(PETSC_SUCCESS);
2829 }
2830 
2831 /*@
2832   MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2833 
2834   Collective
2835 
2836   Input Parameters:
2837 + B  - the matrix
2838 . bs - the block size
2839 . i  - the indices into `j` for the start of each local row (indices start with zero)
2840 . j  - the column indices for each local row (indices start with zero) these must be sorted for each row
2841 - v  - optional values in the matrix, pass `NULL` if not provided
2842 
2843   Level: advanced
2844 
2845   Notes:
2846   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2847   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2848   called this routine.
2849 
2850   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2851   and usually the numerical values as well
2852 
2853   Any entries passed in that are below the diagonal are ignored
2854 
2855 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
2856           `MatCreateMPISBAIJWithArrays()`
2857 @*/
2858 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2859 {
2860   PetscFunctionBegin;
2861   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2862   PetscFunctionReturn(PETSC_SUCCESS);
2863 }
2864 
2865 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2866 {
2867   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
2868   PetscInt    *indx;
2869   PetscScalar *values;
2870 
2871   PetscFunctionBegin;
2872   PetscCall(MatGetSize(inmat, &m, &N));
2873   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2874     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2875     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
2876     PetscInt     *bindx, rmax = a->rmax, j;
2877     PetscMPIInt   rank, size;
2878 
2879     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2880     mbs = m / bs;
2881     Nbs = N / cbs;
2882     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2883     nbs = n / cbs;
2884 
2885     PetscCall(PetscMalloc1(rmax, &bindx));
2886     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2887 
2888     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2889     PetscCallMPI(MPI_Comm_rank(comm, &size));
2890     if (rank == size - 1) {
2891       /* Check sum(nbs) = Nbs */
2892       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2893     }
2894 
2895     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2896     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2897     for (i = 0; i < mbs; i++) {
2898       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2899       nnz = nnz / bs;
2900       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2901       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2902       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2903     }
2904     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2905     PetscCall(PetscFree(bindx));
2906 
2907     PetscCall(MatCreate(comm, outmat));
2908     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2909     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2910     PetscCall(MatSetType(*outmat, MATSBAIJ));
2911     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2912     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2913     MatPreallocateEnd(dnz, onz);
2914   }
2915 
2916   /* numeric phase */
2917   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2918   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2919 
2920   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2921   for (i = 0; i < m; i++) {
2922     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2923     Ii = i + rstart;
2924     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2925     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2926   }
2927   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2928   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2929   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2930   PetscFunctionReturn(PETSC_SUCCESS);
2931 }
2932