xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision a4e35b1925eceef64945ea472b84f2bf06a67b5e)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscblaslapack.h>
5 
6 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
7 {
8   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
9 
10   PetscFunctionBegin;
11   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
12   PetscCall(MatStashDestroy_Private(&mat->stash));
13   PetscCall(MatStashDestroy_Private(&mat->bstash));
14   PetscCall(MatDestroy(&baij->A));
15   PetscCall(MatDestroy(&baij->B));
16 #if defined(PETSC_USE_CTABLE)
17   PetscCall(PetscHMapIDestroy(&baij->colmap));
18 #else
19   PetscCall(PetscFree(baij->colmap));
20 #endif
21   PetscCall(PetscFree(baij->garray));
22   PetscCall(VecDestroy(&baij->lvec));
23   PetscCall(VecScatterDestroy(&baij->Mvctx));
24   PetscCall(VecDestroy(&baij->slvec0));
25   PetscCall(VecDestroy(&baij->slvec0b));
26   PetscCall(VecDestroy(&baij->slvec1));
27   PetscCall(VecDestroy(&baij->slvec1a));
28   PetscCall(VecDestroy(&baij->slvec1b));
29   PetscCall(VecScatterDestroy(&baij->sMvctx));
30   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
31   PetscCall(PetscFree(baij->barray));
32   PetscCall(PetscFree(baij->hd));
33   PetscCall(VecDestroy(&baij->diag));
34   PetscCall(VecDestroy(&baij->bb1));
35   PetscCall(VecDestroy(&baij->xx1));
36 #if defined(PETSC_USE_REAL_MAT_SINGLE)
37   PetscCall(PetscFree(baij->setvaluescopy));
38 #endif
39   PetscCall(PetscFree(baij->in_loc));
40   PetscCall(PetscFree(baij->v_loc));
41   PetscCall(PetscFree(baij->rangebs));
42   PetscCall(PetscFree(mat->data));
43 
44   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
45   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
47   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
49 #if defined(PETSC_HAVE_ELEMENTAL)
50   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
51 #endif
52 #if defined(PETSC_HAVE_SCALAPACK)
53   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
54 #endif
55   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
57   PetscFunctionReturn(PETSC_SUCCESS);
58 }
59 
60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */
61 #define TYPE SBAIJ
62 #define TYPE_SBAIJ
63 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
64 #undef TYPE
65 #undef TYPE_SBAIJ
66 
67 #if defined(PETSC_HAVE_ELEMENTAL)
68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
69 #endif
70 #if defined(PETSC_HAVE_SCALAPACK)
71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
72 #endif
73 
74 /* This could be moved to matimpl.h */
75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
76 {
77   Mat       preallocator;
78   PetscInt  r, rstart, rend;
79   PetscInt  bs, i, m, n, M, N;
80   PetscBool cong = PETSC_TRUE;
81 
82   PetscFunctionBegin;
83   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
84   PetscValidLogicalCollectiveInt(B, nm, 2);
85   for (i = 0; i < nm; i++) {
86     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
87     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
88     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
89   }
90   PetscValidLogicalCollectiveBool(B, fill, 5);
91   PetscCall(MatGetBlockSize(B, &bs));
92   PetscCall(MatGetSize(B, &M, &N));
93   PetscCall(MatGetLocalSize(B, &m, &n));
94   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
95   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
96   PetscCall(MatSetBlockSize(preallocator, bs));
97   PetscCall(MatSetSizes(preallocator, m, n, M, N));
98   PetscCall(MatSetUp(preallocator));
99   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
100   for (r = rstart; r < rend; ++r) {
101     PetscInt           ncols;
102     const PetscInt    *row;
103     const PetscScalar *vals;
104 
105     for (i = 0; i < nm; i++) {
106       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
107       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
108       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
109       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
110     }
111   }
112   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
113   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
114   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
115   PetscCall(MatDestroy(&preallocator));
116   PetscFunctionReturn(PETSC_SUCCESS);
117 }
118 
119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
120 {
121   Mat      B;
122   PetscInt r;
123 
124   PetscFunctionBegin;
125   if (reuse != MAT_REUSE_MATRIX) {
126     PetscBool symm = PETSC_TRUE, isdense;
127     PetscInt  bs;
128 
129     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
130     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
131     PetscCall(MatSetType(B, newtype));
132     PetscCall(MatGetBlockSize(A, &bs));
133     PetscCall(MatSetBlockSize(B, bs));
134     PetscCall(PetscLayoutSetUp(B->rmap));
135     PetscCall(PetscLayoutSetUp(B->cmap));
136     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
137     if (!isdense) {
138       PetscCall(MatGetRowUpperTriangular(A));
139       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
140       PetscCall(MatRestoreRowUpperTriangular(A));
141     } else {
142       PetscCall(MatSetUp(B));
143     }
144   } else {
145     B = *newmat;
146     PetscCall(MatZeroEntries(B));
147   }
148 
149   PetscCall(MatGetRowUpperTriangular(A));
150   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
151     PetscInt           ncols;
152     const PetscInt    *row;
153     const PetscScalar *vals;
154 
155     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
156     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
157 #if defined(PETSC_USE_COMPLEX)
158     if (A->hermitian == PETSC_BOOL3_TRUE) {
159       PetscInt i;
160       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
161     } else {
162       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
163     }
164 #else
165     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
166 #endif
167     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
168   }
169   PetscCall(MatRestoreRowUpperTriangular(A));
170   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
171   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
172 
173   if (reuse == MAT_INPLACE_MATRIX) {
174     PetscCall(MatHeaderReplace(A, &B));
175   } else {
176     *newmat = B;
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
182 {
183   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
184 
185   PetscFunctionBegin;
186   PetscCall(MatStoreValues(aij->A));
187   PetscCall(MatStoreValues(aij->B));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
192 {
193   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
194 
195   PetscFunctionBegin;
196   PetscCall(MatRetrieveValues(aij->A));
197   PetscCall(MatRetrieveValues(aij->B));
198   PetscFunctionReturn(PETSC_SUCCESS);
199 }
200 
201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
202   do { \
203     brow = row / bs; \
204     rp   = aj + ai[brow]; \
205     ap   = aa + bs2 * ai[brow]; \
206     rmax = aimax[brow]; \
207     nrow = ailen[brow]; \
208     bcol = col / bs; \
209     ridx = row % bs; \
210     cidx = col % bs; \
211     low  = 0; \
212     high = nrow; \
213     while (high - low > 3) { \
214       t = (low + high) / 2; \
215       if (rp[t] > bcol) high = t; \
216       else low = t; \
217     } \
218     for (_i = low; _i < high; _i++) { \
219       if (rp[_i] > bcol) break; \
220       if (rp[_i] == bcol) { \
221         bap = ap + bs2 * _i + bs * cidx + ridx; \
222         if (addv == ADD_VALUES) *bap += value; \
223         else *bap = value; \
224         goto a_noinsert; \
225       } \
226     } \
227     if (a->nonew == 1) goto a_noinsert; \
228     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
229     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
230     N = nrow++ - 1; \
231     /* shift up all the later entries in this row */ \
232     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
233     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
234     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
235     rp[_i]                          = bcol; \
236     ap[bs2 * _i + bs * cidx + ridx] = value; \
237     A->nonzerostate++; \
238   a_noinsert:; \
239     ailen[brow] = nrow; \
240   } while (0)
241 
242 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
243   do { \
244     brow = row / bs; \
245     rp   = bj + bi[brow]; \
246     ap   = ba + bs2 * bi[brow]; \
247     rmax = bimax[brow]; \
248     nrow = bilen[brow]; \
249     bcol = col / bs; \
250     ridx = row % bs; \
251     cidx = col % bs; \
252     low  = 0; \
253     high = nrow; \
254     while (high - low > 3) { \
255       t = (low + high) / 2; \
256       if (rp[t] > bcol) high = t; \
257       else low = t; \
258     } \
259     for (_i = low; _i < high; _i++) { \
260       if (rp[_i] > bcol) break; \
261       if (rp[_i] == bcol) { \
262         bap = ap + bs2 * _i + bs * cidx + ridx; \
263         if (addv == ADD_VALUES) *bap += value; \
264         else *bap = value; \
265         goto b_noinsert; \
266       } \
267     } \
268     if (b->nonew == 1) goto b_noinsert; \
269     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
270     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
271     N = nrow++ - 1; \
272     /* shift up all the later entries in this row */ \
273     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
274     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
275     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
276     rp[_i]                          = bcol; \
277     ap[bs2 * _i + bs * cidx + ridx] = value; \
278     B->nonzerostate++; \
279   b_noinsert:; \
280     bilen[brow] = nrow; \
281   } while (0)
282 
283 /* Only add/insert a(i,j) with i<=j (blocks).
284    Any a(i,j) with i>j input by user is ignored or generates an error
285 */
286 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
287 {
288   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
289   MatScalar     value;
290   PetscBool     roworiented = baij->roworiented;
291   PetscInt      i, j, row, col;
292   PetscInt      rstart_orig = mat->rmap->rstart;
293   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
294   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
295 
296   /* Some Variables required in the macro */
297   Mat           A     = baij->A;
298   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)(A)->data;
299   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
300   MatScalar    *aa = a->a;
301 
302   Mat          B     = baij->B;
303   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
304   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
305   MatScalar   *ba = b->a;
306 
307   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
308   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
309   MatScalar *ap, *bap;
310 
311   /* for stash */
312   PetscInt   n_loc, *in_loc = NULL;
313   MatScalar *v_loc = NULL;
314 
315   PetscFunctionBegin;
316   if (!baij->donotstash) {
317     if (n > baij->n_loc) {
318       PetscCall(PetscFree(baij->in_loc));
319       PetscCall(PetscFree(baij->v_loc));
320       PetscCall(PetscMalloc1(n, &baij->in_loc));
321       PetscCall(PetscMalloc1(n, &baij->v_loc));
322 
323       baij->n_loc = n;
324     }
325     in_loc = baij->in_loc;
326     v_loc  = baij->v_loc;
327   }
328 
329   for (i = 0; i < m; i++) {
330     if (im[i] < 0) continue;
331     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
332     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
333       row = im[i] - rstart_orig;                     /* local row index */
334       for (j = 0; j < n; j++) {
335         if (im[i] / bs > in[j] / bs) {
336           if (a->ignore_ltriangular) {
337             continue; /* ignore lower triangular blocks */
338           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
339         }
340         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
341           col  = in[j] - cstart_orig;                    /* local col index */
342           brow = row / bs;
343           bcol = col / bs;
344           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
345           if (roworiented) value = v[i * n + j];
346           else value = v[i + j * m];
347           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
348           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
349         } else if (in[j] < 0) {
350           continue;
351         } else {
352           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
353           /* off-diag entry (B) */
354           if (mat->was_assembled) {
355             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
356 #if defined(PETSC_USE_CTABLE)
357             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
358             col = col - 1;
359 #else
360             col = baij->colmap[in[j] / bs] - 1;
361 #endif
362             if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) {
363               PetscCall(MatDisAssemble_MPISBAIJ(mat));
364               col = in[j];
365               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
366               B     = baij->B;
367               b     = (Mat_SeqBAIJ *)(B)->data;
368               bimax = b->imax;
369               bi    = b->i;
370               bilen = b->ilen;
371               bj    = b->j;
372               ba    = b->a;
373             } else col += in[j] % bs;
374           } else col = in[j];
375           if (roworiented) value = v[i * n + j];
376           else value = v[i + j * m];
377           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
378           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
379         }
380       }
381     } else { /* off processor entry */
382       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
383       if (!baij->donotstash) {
384         mat->assembled = PETSC_FALSE;
385         n_loc          = 0;
386         for (j = 0; j < n; j++) {
387           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
388           in_loc[n_loc] = in[j];
389           if (roworiented) {
390             v_loc[n_loc] = v[i * n + j];
391           } else {
392             v_loc[n_loc] = v[j * m + i];
393           }
394           n_loc++;
395         }
396         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
397       }
398     }
399   }
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
404 {
405   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
406   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
407   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
408   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
409   PetscBool          roworiented = a->roworiented;
410   const PetscScalar *value       = v;
411   MatScalar         *ap, *aa = a->a, *bap;
412 
413   PetscFunctionBegin;
414   if (col < row) {
415     if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
416     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
417   }
418   rp    = aj + ai[row];
419   ap    = aa + bs2 * ai[row];
420   rmax  = imax[row];
421   nrow  = ailen[row];
422   value = v;
423   low   = 0;
424   high  = nrow;
425 
426   while (high - low > 7) {
427     t = (low + high) / 2;
428     if (rp[t] > col) high = t;
429     else low = t;
430   }
431   for (i = low; i < high; i++) {
432     if (rp[i] > col) break;
433     if (rp[i] == col) {
434       bap = ap + bs2 * i;
435       if (roworiented) {
436         if (is == ADD_VALUES) {
437           for (ii = 0; ii < bs; ii++) {
438             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
439           }
440         } else {
441           for (ii = 0; ii < bs; ii++) {
442             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
443           }
444         }
445       } else {
446         if (is == ADD_VALUES) {
447           for (ii = 0; ii < bs; ii++) {
448             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
449           }
450         } else {
451           for (ii = 0; ii < bs; ii++) {
452             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
453           }
454         }
455       }
456       goto noinsert2;
457     }
458   }
459   if (nonew == 1) goto noinsert2;
460   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
461   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
462   N = nrow++ - 1;
463   high++;
464   /* shift up all the later entries in this row */
465   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
466   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
467   rp[i] = col;
468   bap   = ap + bs2 * i;
469   if (roworiented) {
470     for (ii = 0; ii < bs; ii++) {
471       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
472     }
473   } else {
474     for (ii = 0; ii < bs; ii++) {
475       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
476     }
477   }
478 noinsert2:;
479   ailen[row] = nrow;
480   PetscFunctionReturn(PETSC_SUCCESS);
481 }
482 
483 /*
484    This routine is exactly duplicated in mpibaij.c
485 */
486 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
487 {
488   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
489   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
490   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
491   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
492   PetscBool          roworiented = a->roworiented;
493   const PetscScalar *value       = v;
494   MatScalar         *ap, *aa = a->a, *bap;
495 
496   PetscFunctionBegin;
497   rp    = aj + ai[row];
498   ap    = aa + bs2 * ai[row];
499   rmax  = imax[row];
500   nrow  = ailen[row];
501   low   = 0;
502   high  = nrow;
503   value = v;
504   while (high - low > 7) {
505     t = (low + high) / 2;
506     if (rp[t] > col) high = t;
507     else low = t;
508   }
509   for (i = low; i < high; i++) {
510     if (rp[i] > col) break;
511     if (rp[i] == col) {
512       bap = ap + bs2 * i;
513       if (roworiented) {
514         if (is == ADD_VALUES) {
515           for (ii = 0; ii < bs; ii++) {
516             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
517           }
518         } else {
519           for (ii = 0; ii < bs; ii++) {
520             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
521           }
522         }
523       } else {
524         if (is == ADD_VALUES) {
525           for (ii = 0; ii < bs; ii++, value += bs) {
526             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
527             bap += bs;
528           }
529         } else {
530           for (ii = 0; ii < bs; ii++, value += bs) {
531             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
532             bap += bs;
533           }
534         }
535       }
536       goto noinsert2;
537     }
538   }
539   if (nonew == 1) goto noinsert2;
540   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
541   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
542   N = nrow++ - 1;
543   high++;
544   /* shift up all the later entries in this row */
545   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
546   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
547   rp[i] = col;
548   bap   = ap + bs2 * i;
549   if (roworiented) {
550     for (ii = 0; ii < bs; ii++) {
551       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
552     }
553   } else {
554     for (ii = 0; ii < bs; ii++) {
555       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
556     }
557   }
558 noinsert2:;
559   ailen[row] = nrow;
560   PetscFunctionReturn(PETSC_SUCCESS);
561 }
562 
563 /*
564     This routine could be optimized by removing the need for the block copy below and passing stride information
565   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
566 */
567 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
568 {
569   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
570   const MatScalar *value;
571   MatScalar       *barray      = baij->barray;
572   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
573   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
574   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
575   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
576 
577   PetscFunctionBegin;
578   if (!barray) {
579     PetscCall(PetscMalloc1(bs2, &barray));
580     baij->barray = barray;
581   }
582 
583   if (roworiented) {
584     stepval = (n - 1) * bs;
585   } else {
586     stepval = (m - 1) * bs;
587   }
588   for (i = 0; i < m; i++) {
589     if (im[i] < 0) continue;
590     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
591     if (im[i] >= rstart && im[i] < rend) {
592       row = im[i] - rstart;
593       for (j = 0; j < n; j++) {
594         if (im[i] > in[j]) {
595           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
596           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
597         }
598         /* If NumCol = 1 then a copy is not required */
599         if ((roworiented) && (n == 1)) {
600           barray = (MatScalar *)v + i * bs2;
601         } else if ((!roworiented) && (m == 1)) {
602           barray = (MatScalar *)v + j * bs2;
603         } else { /* Here a copy is required */
604           if (roworiented) {
605             value = v + i * (stepval + bs) * bs + j * bs;
606           } else {
607             value = v + j * (stepval + bs) * bs + i * bs;
608           }
609           for (ii = 0; ii < bs; ii++, value += stepval) {
610             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
611           }
612           barray -= bs2;
613         }
614 
615         if (in[j] >= cstart && in[j] < cend) {
616           col = in[j] - cstart;
617           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
618         } else if (in[j] < 0) {
619           continue;
620         } else {
621           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
622           if (mat->was_assembled) {
623             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
624 
625 #if defined(PETSC_USE_DEBUG)
626   #if defined(PETSC_USE_CTABLE)
627             {
628               PetscInt data;
629               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
630               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
631             }
632   #else
633             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
634   #endif
635 #endif
636 #if defined(PETSC_USE_CTABLE)
637             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
638             col = (col - 1) / bs;
639 #else
640             col = (baij->colmap[in[j]] - 1) / bs;
641 #endif
642             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
643               PetscCall(MatDisAssemble_MPISBAIJ(mat));
644               col = in[j];
645             }
646           } else col = in[j];
647           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
648         }
649       }
650     } else {
651       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
652       if (!baij->donotstash) {
653         if (roworiented) {
654           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
655         } else {
656           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
657         }
658       }
659     }
660   }
661   PetscFunctionReturn(PETSC_SUCCESS);
662 }
663 
664 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
665 {
666   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
667   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
668   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
669 
670   PetscFunctionBegin;
671   for (i = 0; i < m; i++) {
672     if (idxm[i] < 0) continue; /* negative row */
673     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
674     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
675       row = idxm[i] - bsrstart;
676       for (j = 0; j < n; j++) {
677         if (idxn[j] < 0) continue; /* negative column */
678         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
679         if (idxn[j] >= bscstart && idxn[j] < bscend) {
680           col = idxn[j] - bscstart;
681           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
682         } else {
683           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
684 #if defined(PETSC_USE_CTABLE)
685           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
686           data--;
687 #else
688           data = baij->colmap[idxn[j] / bs] - 1;
689 #endif
690           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
691           else {
692             col = data + idxn[j] % bs;
693             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
694           }
695         }
696       }
697     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
698   }
699   PetscFunctionReturn(PETSC_SUCCESS);
700 }
701 
702 PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
703 {
704   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
705   PetscReal     sum[2], *lnorm2;
706 
707   PetscFunctionBegin;
708   if (baij->size == 1) {
709     PetscCall(MatNorm(baij->A, type, norm));
710   } else {
711     if (type == NORM_FROBENIUS) {
712       PetscCall(PetscMalloc1(2, &lnorm2));
713       PetscCall(MatNorm(baij->A, type, lnorm2));
714       *lnorm2 = (*lnorm2) * (*lnorm2);
715       lnorm2++; /* squar power of norm(A) */
716       PetscCall(MatNorm(baij->B, type, lnorm2));
717       *lnorm2 = (*lnorm2) * (*lnorm2);
718       lnorm2--; /* squar power of norm(B) */
719       PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
720       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
721       PetscCall(PetscFree(lnorm2));
722     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
723       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
724       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
725       PetscReal    *rsum, *rsum2, vabs;
726       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
727       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
728       MatScalar    *v;
729 
730       PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
731       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
732       /* Amat */
733       v  = amat->a;
734       jj = amat->j;
735       for (brow = 0; brow < mbs; brow++) {
736         grow = bs * (rstart + brow);
737         nz   = amat->i[brow + 1] - amat->i[brow];
738         for (bcol = 0; bcol < nz; bcol++) {
739           gcol = bs * (rstart + *jj);
740           jj++;
741           for (col = 0; col < bs; col++) {
742             for (row = 0; row < bs; row++) {
743               vabs = PetscAbsScalar(*v);
744               v++;
745               rsum[gcol + col] += vabs;
746               /* non-diagonal block */
747               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
748             }
749           }
750         }
751         PetscCall(PetscLogFlops(nz * bs * bs));
752       }
753       /* Bmat */
754       v  = bmat->a;
755       jj = bmat->j;
756       for (brow = 0; brow < mbs; brow++) {
757         grow = bs * (rstart + brow);
758         nz   = bmat->i[brow + 1] - bmat->i[brow];
759         for (bcol = 0; bcol < nz; bcol++) {
760           gcol = bs * garray[*jj];
761           jj++;
762           for (col = 0; col < bs; col++) {
763             for (row = 0; row < bs; row++) {
764               vabs = PetscAbsScalar(*v);
765               v++;
766               rsum[gcol + col] += vabs;
767               rsum[grow + row] += vabs;
768             }
769           }
770         }
771         PetscCall(PetscLogFlops(nz * bs * bs));
772       }
773       PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
774       *norm = 0.0;
775       for (col = 0; col < mat->cmap->N; col++) {
776         if (rsum2[col] > *norm) *norm = rsum2[col];
777       }
778       PetscCall(PetscFree2(rsum, rsum2));
779     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
780   }
781   PetscFunctionReturn(PETSC_SUCCESS);
782 }
783 
784 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
785 {
786   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
787   PetscInt      nstash, reallocs;
788 
789   PetscFunctionBegin;
790   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
791 
792   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
793   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
794   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
795   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
796   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
797   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
798   PetscFunctionReturn(PETSC_SUCCESS);
799 }
800 
801 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
802 {
803   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
804   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
805   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
806   PetscInt     *row, *col;
807   PetscBool     other_disassembled;
808   PetscMPIInt   n;
809   PetscBool     r1, r2, r3;
810   MatScalar    *val;
811 
812   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
813   PetscFunctionBegin;
814   if (!baij->donotstash && !mat->nooffprocentries) {
815     while (1) {
816       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
817       if (!flg) break;
818 
819       for (i = 0; i < n;) {
820         /* Now identify the consecutive vals belonging to the same row */
821         for (j = i, rstart = row[j]; j < n; j++) {
822           if (row[j] != rstart) break;
823         }
824         if (j < n) ncols = j - i;
825         else ncols = n - i;
826         /* Now assemble all these values with a single function call */
827         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
828         i = j;
829       }
830     }
831     PetscCall(MatStashScatterEnd_Private(&mat->stash));
832     /* Now process the block-stash. Since the values are stashed column-oriented,
833        set the roworiented flag to column oriented, and after MatSetValues()
834        restore the original flags */
835     r1 = baij->roworiented;
836     r2 = a->roworiented;
837     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
838 
839     baij->roworiented = PETSC_FALSE;
840     a->roworiented    = PETSC_FALSE;
841 
842     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
843     while (1) {
844       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
845       if (!flg) break;
846 
847       for (i = 0; i < n;) {
848         /* Now identify the consecutive vals belonging to the same row */
849         for (j = i, rstart = row[j]; j < n; j++) {
850           if (row[j] != rstart) break;
851         }
852         if (j < n) ncols = j - i;
853         else ncols = n - i;
854         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
855         i = j;
856       }
857     }
858     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
859 
860     baij->roworiented = r1;
861     a->roworiented    = r2;
862 
863     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
864   }
865 
866   PetscCall(MatAssemblyBegin(baij->A, mode));
867   PetscCall(MatAssemblyEnd(baij->A, mode));
868 
869   /* determine if any processor has disassembled, if so we must
870      also disassemble ourselves, in order that we may reassemble. */
871   /*
872      if nonzero structure of submatrix B cannot change then we know that
873      no processor disassembled thus we can skip this stuff
874   */
875   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
876     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
877     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
878   }
879 
880   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
881   PetscCall(MatAssemblyBegin(baij->B, mode));
882   PetscCall(MatAssemblyEnd(baij->B, mode));
883 
884   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
885 
886   baij->rowvalues = NULL;
887 
888   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
889   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
890     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
891     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
892   }
893   PetscFunctionReturn(PETSC_SUCCESS);
894 }
895 
896 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
897 #include <petscdraw.h>
898 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
899 {
900   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
901   PetscInt          bs   = mat->rmap->bs;
902   PetscMPIInt       rank = baij->rank;
903   PetscBool         iascii, isdraw;
904   PetscViewer       sviewer;
905   PetscViewerFormat format;
906 
907   PetscFunctionBegin;
908   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
909   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
910   if (iascii) {
911     PetscCall(PetscViewerGetFormat(viewer, &format));
912     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
913       MatInfo info;
914       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
915       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
916       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
917       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
918                                                    mat->rmap->bs, (double)info.memory));
919       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
920       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
921       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
922       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
923       PetscCall(PetscViewerFlush(viewer));
924       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
925       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
926       PetscCall(VecScatterView(baij->Mvctx, viewer));
927       PetscFunctionReturn(PETSC_SUCCESS);
928     } else if (format == PETSC_VIEWER_ASCII_INFO) {
929       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
930       PetscFunctionReturn(PETSC_SUCCESS);
931     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
932       PetscFunctionReturn(PETSC_SUCCESS);
933     }
934   }
935 
936   if (isdraw) {
937     PetscDraw draw;
938     PetscBool isnull;
939     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
940     PetscCall(PetscDrawIsNull(draw, &isnull));
941     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
942   }
943 
944   {
945     /* assemble the entire matrix onto first processor. */
946     Mat           A;
947     Mat_SeqSBAIJ *Aloc;
948     Mat_SeqBAIJ  *Bloc;
949     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
950     MatScalar    *a;
951     const char   *matname;
952 
953     /* Should this be the same type as mat? */
954     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
955     if (rank == 0) {
956       PetscCall(MatSetSizes(A, M, N, M, N));
957     } else {
958       PetscCall(MatSetSizes(A, 0, 0, M, N));
959     }
960     PetscCall(MatSetType(A, MATMPISBAIJ));
961     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
962     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
963 
964     /* copy over the A part */
965     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
966     ai   = Aloc->i;
967     aj   = Aloc->j;
968     a    = Aloc->a;
969     PetscCall(PetscMalloc1(bs, &rvals));
970 
971     for (i = 0; i < mbs; i++) {
972       rvals[0] = bs * (baij->rstartbs + i);
973       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
974       for (j = ai[i]; j < ai[i + 1]; j++) {
975         col = (baij->cstartbs + aj[j]) * bs;
976         for (k = 0; k < bs; k++) {
977           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
978           col++;
979           a += bs;
980         }
981       }
982     }
983     /* copy over the B part */
984     Bloc = (Mat_SeqBAIJ *)baij->B->data;
985     ai   = Bloc->i;
986     aj   = Bloc->j;
987     a    = Bloc->a;
988     for (i = 0; i < mbs; i++) {
989       rvals[0] = bs * (baij->rstartbs + i);
990       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
991       for (j = ai[i]; j < ai[i + 1]; j++) {
992         col = baij->garray[aj[j]] * bs;
993         for (k = 0; k < bs; k++) {
994           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
995           col++;
996           a += bs;
997         }
998       }
999     }
1000     PetscCall(PetscFree(rvals));
1001     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1002     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1003     /*
1004        Everyone has to call to draw the matrix since the graphics waits are
1005        synchronized across all processors that share the PetscDraw object
1006     */
1007     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1008     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1009     if (rank == 0) {
1010       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname));
1011       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer));
1012     }
1013     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1014     PetscCall(PetscViewerFlush(viewer));
1015     PetscCall(MatDestroy(&A));
1016   }
1017   PetscFunctionReturn(PETSC_SUCCESS);
1018 }
1019 
1020 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1021 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
1022 
1023 PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
1024 {
1025   PetscBool iascii, isdraw, issocket, isbinary;
1026 
1027   PetscFunctionBegin;
1028   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1029   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1030   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1031   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1032   if (iascii || isdraw || issocket) {
1033     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
1034   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
1035   PetscFunctionReturn(PETSC_SUCCESS);
1036 }
1037 
1038 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1039 {
1040   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1041   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1042   PetscScalar       *from;
1043   const PetscScalar *x;
1044 
1045   PetscFunctionBegin;
1046   /* diagonal part */
1047   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1048   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1049   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1050   PetscCall(VecZeroEntries(a->slvec1b));
1051 
1052   /* subdiagonal part */
1053   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1054   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1055 
1056   /* copy x into the vec slvec0 */
1057   PetscCall(VecGetArray(a->slvec0, &from));
1058   PetscCall(VecGetArrayRead(xx, &x));
1059 
1060   PetscCall(PetscArraycpy(from, x, bs * mbs));
1061   PetscCall(VecRestoreArray(a->slvec0, &from));
1062   PetscCall(VecRestoreArrayRead(xx, &x));
1063 
1064   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1065   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1066   /* supperdiagonal part */
1067   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1068   PetscFunctionReturn(PETSC_SUCCESS);
1069 }
1070 
1071 PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1072 {
1073   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1074   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1075   PetscScalar       *from;
1076   const PetscScalar *x;
1077 
1078   PetscFunctionBegin;
1079   /* diagonal part */
1080   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1081   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1082   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1083   PetscCall(VecZeroEntries(a->slvec1b));
1084 
1085   /* subdiagonal part */
1086   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1087 
1088   /* copy x into the vec slvec0 */
1089   PetscCall(VecGetArray(a->slvec0, &from));
1090   PetscCall(VecGetArrayRead(xx, &x));
1091 
1092   PetscCall(PetscArraycpy(from, x, bs * mbs));
1093   PetscCall(VecRestoreArray(a->slvec0, &from));
1094   PetscCall(VecRestoreArrayRead(xx, &x));
1095 
1096   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1097   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1098   /* supperdiagonal part */
1099   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1100   PetscFunctionReturn(PETSC_SUCCESS);
1101 }
1102 
1103 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1104 {
1105   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1106   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1107   PetscScalar       *from;
1108   const PetscScalar *x;
1109 
1110   PetscFunctionBegin;
1111   /* diagonal part */
1112   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1113   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1114   PetscCall(VecZeroEntries(a->slvec1b));
1115 
1116   /* subdiagonal part */
1117   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1118   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1119 
1120   /* copy x into the vec slvec0 */
1121   PetscCall(VecGetArray(a->slvec0, &from));
1122   PetscCall(VecGetArrayRead(xx, &x));
1123   PetscCall(PetscArraycpy(from, x, bs * mbs));
1124   PetscCall(VecRestoreArray(a->slvec0, &from));
1125 
1126   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1127   PetscCall(VecRestoreArrayRead(xx, &x));
1128   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1129 
1130   /* supperdiagonal part */
1131   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1132   PetscFunctionReturn(PETSC_SUCCESS);
1133 }
1134 
1135 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1136 {
1137   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1138   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1139   PetscScalar       *from;
1140   const PetscScalar *x;
1141 
1142   PetscFunctionBegin;
1143   /* diagonal part */
1144   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1145   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1146   PetscCall(VecZeroEntries(a->slvec1b));
1147 
1148   /* subdiagonal part */
1149   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1150 
1151   /* copy x into the vec slvec0 */
1152   PetscCall(VecGetArray(a->slvec0, &from));
1153   PetscCall(VecGetArrayRead(xx, &x));
1154   PetscCall(PetscArraycpy(from, x, bs * mbs));
1155   PetscCall(VecRestoreArray(a->slvec0, &from));
1156 
1157   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1158   PetscCall(VecRestoreArrayRead(xx, &x));
1159   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1160 
1161   /* supperdiagonal part */
1162   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1163   PetscFunctionReturn(PETSC_SUCCESS);
1164 }
1165 
1166 /*
1167   This only works correctly for square matrices where the subblock A->A is the
1168    diagonal block
1169 */
1170 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1171 {
1172   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1173 
1174   PetscFunctionBegin;
1175   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1176   PetscCall(MatGetDiagonal(a->A, v));
1177   PetscFunctionReturn(PETSC_SUCCESS);
1178 }
1179 
1180 PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1181 {
1182   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1183 
1184   PetscFunctionBegin;
1185   PetscCall(MatScale(a->A, aa));
1186   PetscCall(MatScale(a->B, aa));
1187   PetscFunctionReturn(PETSC_SUCCESS);
1188 }
1189 
1190 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1191 {
1192   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1193   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1194   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1195   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1196   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1197 
1198   PetscFunctionBegin;
1199   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1200   mat->getrowactive = PETSC_TRUE;
1201 
1202   if (!mat->rowvalues && (idx || v)) {
1203     /*
1204         allocate enough space to hold information from the longest row.
1205     */
1206     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1207     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1208     PetscInt      max = 1, mbs = mat->mbs, tmp;
1209     for (i = 0; i < mbs; i++) {
1210       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1211       if (max < tmp) max = tmp;
1212     }
1213     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1214   }
1215 
1216   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1217   lrow = row - brstart; /* local row index */
1218 
1219   pvA = &vworkA;
1220   pcA = &cworkA;
1221   pvB = &vworkB;
1222   pcB = &cworkB;
1223   if (!v) {
1224     pvA = NULL;
1225     pvB = NULL;
1226   }
1227   if (!idx) {
1228     pcA = NULL;
1229     if (!v) pcB = NULL;
1230   }
1231   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1232   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1233   nztot = nzA + nzB;
1234 
1235   cmap = mat->garray;
1236   if (v || idx) {
1237     if (nztot) {
1238       /* Sort by increasing column numbers, assuming A and B already sorted */
1239       PetscInt imark = -1;
1240       if (v) {
1241         *v = v_p = mat->rowvalues;
1242         for (i = 0; i < nzB; i++) {
1243           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1244           else break;
1245         }
1246         imark = i;
1247         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1248         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1249       }
1250       if (idx) {
1251         *idx = idx_p = mat->rowindices;
1252         if (imark > -1) {
1253           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1254         } else {
1255           for (i = 0; i < nzB; i++) {
1256             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1257             else break;
1258           }
1259           imark = i;
1260         }
1261         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1262         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1263       }
1264     } else {
1265       if (idx) *idx = NULL;
1266       if (v) *v = NULL;
1267     }
1268   }
1269   *nz = nztot;
1270   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1271   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1272   PetscFunctionReturn(PETSC_SUCCESS);
1273 }
1274 
1275 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1276 {
1277   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1278 
1279   PetscFunctionBegin;
1280   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1281   baij->getrowactive = PETSC_FALSE;
1282   PetscFunctionReturn(PETSC_SUCCESS);
1283 }
1284 
1285 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1286 {
1287   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1288   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1289 
1290   PetscFunctionBegin;
1291   aA->getrow_utriangular = PETSC_TRUE;
1292   PetscFunctionReturn(PETSC_SUCCESS);
1293 }
1294 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1295 {
1296   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1297   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1298 
1299   PetscFunctionBegin;
1300   aA->getrow_utriangular = PETSC_FALSE;
1301   PetscFunctionReturn(PETSC_SUCCESS);
1302 }
1303 
1304 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1305 {
1306   PetscFunctionBegin;
1307   if (PetscDefined(USE_COMPLEX)) {
1308     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1309 
1310     PetscCall(MatConjugate(a->A));
1311     PetscCall(MatConjugate(a->B));
1312   }
1313   PetscFunctionReturn(PETSC_SUCCESS);
1314 }
1315 
1316 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1317 {
1318   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1319 
1320   PetscFunctionBegin;
1321   PetscCall(MatRealPart(a->A));
1322   PetscCall(MatRealPart(a->B));
1323   PetscFunctionReturn(PETSC_SUCCESS);
1324 }
1325 
1326 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1327 {
1328   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1329 
1330   PetscFunctionBegin;
1331   PetscCall(MatImaginaryPart(a->A));
1332   PetscCall(MatImaginaryPart(a->B));
1333   PetscFunctionReturn(PETSC_SUCCESS);
1334 }
1335 
1336 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1337    Input: isrow       - distributed(parallel),
1338           iscol_local - locally owned (seq)
1339 */
1340 PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1341 {
1342   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
1343   const PetscInt *ptr1, *ptr2;
1344 
1345   PetscFunctionBegin;
1346   PetscCall(ISGetLocalSize(isrow, &sz1));
1347   PetscCall(ISGetLocalSize(iscol_local, &sz2));
1348   if (sz1 > sz2) {
1349     *flg = PETSC_FALSE;
1350     PetscFunctionReturn(PETSC_SUCCESS);
1351   }
1352 
1353   PetscCall(ISGetIndices(isrow, &ptr1));
1354   PetscCall(ISGetIndices(iscol_local, &ptr2));
1355 
1356   PetscCall(PetscMalloc1(sz1, &a1));
1357   PetscCall(PetscMalloc1(sz2, &a2));
1358   PetscCall(PetscArraycpy(a1, ptr1, sz1));
1359   PetscCall(PetscArraycpy(a2, ptr2, sz2));
1360   PetscCall(PetscSortInt(sz1, a1));
1361   PetscCall(PetscSortInt(sz2, a2));
1362 
1363   nmatch = 0;
1364   k      = 0;
1365   for (i = 0; i < sz1; i++) {
1366     for (j = k; j < sz2; j++) {
1367       if (a1[i] == a2[j]) {
1368         k = j;
1369         nmatch++;
1370         break;
1371       }
1372     }
1373   }
1374   PetscCall(ISRestoreIndices(isrow, &ptr1));
1375   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1376   PetscCall(PetscFree(a1));
1377   PetscCall(PetscFree(a2));
1378   if (nmatch < sz1) {
1379     *flg = PETSC_FALSE;
1380   } else {
1381     *flg = PETSC_TRUE;
1382   }
1383   PetscFunctionReturn(PETSC_SUCCESS);
1384 }
1385 
1386 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1387 {
1388   IS        iscol_local;
1389   PetscInt  csize;
1390   PetscBool isequal;
1391 
1392   PetscFunctionBegin;
1393   PetscCall(ISGetLocalSize(iscol, &csize));
1394   if (call == MAT_REUSE_MATRIX) {
1395     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1396     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1397   } else {
1398     PetscBool issorted;
1399 
1400     PetscCall(ISAllGather(iscol, &iscol_local));
1401     PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1402     PetscCall(ISSorted(iscol_local, &issorted));
1403     PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted");
1404   }
1405 
1406   /* now call MatCreateSubMatrix_MPIBAIJ() */
1407   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1408   if (call == MAT_INITIAL_MATRIX) {
1409     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1410     PetscCall(ISDestroy(&iscol_local));
1411   }
1412   PetscFunctionReturn(PETSC_SUCCESS);
1413 }
1414 
1415 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1416 {
1417   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1418 
1419   PetscFunctionBegin;
1420   PetscCall(MatZeroEntries(l->A));
1421   PetscCall(MatZeroEntries(l->B));
1422   PetscFunctionReturn(PETSC_SUCCESS);
1423 }
1424 
1425 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1426 {
1427   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1428   Mat            A = a->A, B = a->B;
1429   PetscLogDouble isend[5], irecv[5];
1430 
1431   PetscFunctionBegin;
1432   info->block_size = (PetscReal)matin->rmap->bs;
1433 
1434   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1435 
1436   isend[0] = info->nz_used;
1437   isend[1] = info->nz_allocated;
1438   isend[2] = info->nz_unneeded;
1439   isend[3] = info->memory;
1440   isend[4] = info->mallocs;
1441 
1442   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1443 
1444   isend[0] += info->nz_used;
1445   isend[1] += info->nz_allocated;
1446   isend[2] += info->nz_unneeded;
1447   isend[3] += info->memory;
1448   isend[4] += info->mallocs;
1449   if (flag == MAT_LOCAL) {
1450     info->nz_used      = isend[0];
1451     info->nz_allocated = isend[1];
1452     info->nz_unneeded  = isend[2];
1453     info->memory       = isend[3];
1454     info->mallocs      = isend[4];
1455   } else if (flag == MAT_GLOBAL_MAX) {
1456     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1457 
1458     info->nz_used      = irecv[0];
1459     info->nz_allocated = irecv[1];
1460     info->nz_unneeded  = irecv[2];
1461     info->memory       = irecv[3];
1462     info->mallocs      = irecv[4];
1463   } else if (flag == MAT_GLOBAL_SUM) {
1464     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1465 
1466     info->nz_used      = irecv[0];
1467     info->nz_allocated = irecv[1];
1468     info->nz_unneeded  = irecv[2];
1469     info->memory       = irecv[3];
1470     info->mallocs      = irecv[4];
1471   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1472   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1473   info->fill_ratio_needed = 0;
1474   info->factor_mallocs    = 0;
1475   PetscFunctionReturn(PETSC_SUCCESS);
1476 }
1477 
1478 PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1479 {
1480   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1481   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1482 
1483   PetscFunctionBegin;
1484   switch (op) {
1485   case MAT_NEW_NONZERO_LOCATIONS:
1486   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1487   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1488   case MAT_KEEP_NONZERO_PATTERN:
1489   case MAT_SUBMAT_SINGLEIS:
1490   case MAT_NEW_NONZERO_LOCATION_ERR:
1491     MatCheckPreallocated(A, 1);
1492     PetscCall(MatSetOption(a->A, op, flg));
1493     PetscCall(MatSetOption(a->B, op, flg));
1494     break;
1495   case MAT_ROW_ORIENTED:
1496     MatCheckPreallocated(A, 1);
1497     a->roworiented = flg;
1498 
1499     PetscCall(MatSetOption(a->A, op, flg));
1500     PetscCall(MatSetOption(a->B, op, flg));
1501     break;
1502   case MAT_FORCE_DIAGONAL_ENTRIES:
1503   case MAT_SORTED_FULL:
1504     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1505     break;
1506   case MAT_IGNORE_OFF_PROC_ENTRIES:
1507     a->donotstash = flg;
1508     break;
1509   case MAT_USE_HASH_TABLE:
1510     a->ht_flag = flg;
1511     break;
1512   case MAT_HERMITIAN:
1513     MatCheckPreallocated(A, 1);
1514     PetscCall(MatSetOption(a->A, op, flg));
1515 #if defined(PETSC_USE_COMPLEX)
1516     if (flg) { /* need different mat-vec ops */
1517       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1518       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1519       A->ops->multtranspose    = NULL;
1520       A->ops->multtransposeadd = NULL;
1521       A->symmetric             = PETSC_BOOL3_FALSE;
1522     }
1523 #endif
1524     break;
1525   case MAT_SPD:
1526   case MAT_SYMMETRIC:
1527     MatCheckPreallocated(A, 1);
1528     PetscCall(MatSetOption(a->A, op, flg));
1529 #if defined(PETSC_USE_COMPLEX)
1530     if (flg) { /* restore to use default mat-vec ops */
1531       A->ops->mult             = MatMult_MPISBAIJ;
1532       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1533       A->ops->multtranspose    = MatMult_MPISBAIJ;
1534       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1535     }
1536 #endif
1537     break;
1538   case MAT_STRUCTURALLY_SYMMETRIC:
1539     MatCheckPreallocated(A, 1);
1540     PetscCall(MatSetOption(a->A, op, flg));
1541     break;
1542   case MAT_SYMMETRY_ETERNAL:
1543   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1544     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
1545     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1546     break;
1547   case MAT_SPD_ETERNAL:
1548     break;
1549   case MAT_IGNORE_LOWER_TRIANGULAR:
1550     aA->ignore_ltriangular = flg;
1551     break;
1552   case MAT_ERROR_LOWER_TRIANGULAR:
1553     aA->ignore_ltriangular = flg;
1554     break;
1555   case MAT_GETROW_UPPERTRIANGULAR:
1556     aA->getrow_utriangular = flg;
1557     break;
1558   default:
1559     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1560   }
1561   PetscFunctionReturn(PETSC_SUCCESS);
1562 }
1563 
1564 PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1565 {
1566   PetscFunctionBegin;
1567   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1568   if (reuse == MAT_INITIAL_MATRIX) {
1569     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1570   } else if (reuse == MAT_REUSE_MATRIX) {
1571     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1572   }
1573   PetscFunctionReturn(PETSC_SUCCESS);
1574 }
1575 
1576 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1577 {
1578   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1579   Mat           a = baij->A, b = baij->B;
1580   PetscInt      nv, m, n;
1581   PetscBool     flg;
1582 
1583   PetscFunctionBegin;
1584   if (ll != rr) {
1585     PetscCall(VecEqual(ll, rr, &flg));
1586     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1587   }
1588   if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1589 
1590   PetscCall(MatGetLocalSize(mat, &m, &n));
1591   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1592 
1593   PetscCall(VecGetLocalSize(rr, &nv));
1594   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1595 
1596   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1597 
1598   /* left diagonalscale the off-diagonal part */
1599   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1600 
1601   /* scale the diagonal part */
1602   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1603 
1604   /* right diagonalscale the off-diagonal part */
1605   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1606   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1607   PetscFunctionReturn(PETSC_SUCCESS);
1608 }
1609 
1610 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1611 {
1612   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1613 
1614   PetscFunctionBegin;
1615   PetscCall(MatSetUnfactored(a->A));
1616   PetscFunctionReturn(PETSC_SUCCESS);
1617 }
1618 
1619 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1620 
1621 PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1622 {
1623   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1624   Mat           a, b, c, d;
1625   PetscBool     flg;
1626 
1627   PetscFunctionBegin;
1628   a = matA->A;
1629   b = matA->B;
1630   c = matB->A;
1631   d = matB->B;
1632 
1633   PetscCall(MatEqual(a, c, &flg));
1634   if (flg) PetscCall(MatEqual(b, d, &flg));
1635   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1636   PetscFunctionReturn(PETSC_SUCCESS);
1637 }
1638 
1639 PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1640 {
1641   PetscBool isbaij;
1642 
1643   PetscFunctionBegin;
1644   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1645   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1646   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1647   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1648     PetscCall(MatGetRowUpperTriangular(A));
1649     PetscCall(MatCopy_Basic(A, B, str));
1650     PetscCall(MatRestoreRowUpperTriangular(A));
1651   } else {
1652     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1653     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1654 
1655     PetscCall(MatCopy(a->A, b->A, str));
1656     PetscCall(MatCopy(a->B, b->B, str));
1657   }
1658   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1659   PetscFunctionReturn(PETSC_SUCCESS);
1660 }
1661 
1662 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1663 {
1664   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1665   PetscBLASInt  bnz, one                          = 1;
1666   Mat_SeqSBAIJ *xa, *ya;
1667   Mat_SeqBAIJ  *xb, *yb;
1668 
1669   PetscFunctionBegin;
1670   if (str == SAME_NONZERO_PATTERN) {
1671     PetscScalar alpha = a;
1672     xa                = (Mat_SeqSBAIJ *)xx->A->data;
1673     ya                = (Mat_SeqSBAIJ *)yy->A->data;
1674     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1675     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1676     xb = (Mat_SeqBAIJ *)xx->B->data;
1677     yb = (Mat_SeqBAIJ *)yy->B->data;
1678     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1679     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1680     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1681   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1682     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1683     PetscCall(MatAXPY_Basic(Y, a, X, str));
1684     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1685   } else {
1686     Mat       B;
1687     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1688     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1689     PetscCall(MatGetRowUpperTriangular(X));
1690     PetscCall(MatGetRowUpperTriangular(Y));
1691     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1692     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1693     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1694     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1695     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1696     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1697     PetscCall(MatSetType(B, MATMPISBAIJ));
1698     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1699     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1700     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1701     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1702     PetscCall(MatHeaderMerge(Y, &B));
1703     PetscCall(PetscFree(nnz_d));
1704     PetscCall(PetscFree(nnz_o));
1705     PetscCall(MatRestoreRowUpperTriangular(X));
1706     PetscCall(MatRestoreRowUpperTriangular(Y));
1707   }
1708   PetscFunctionReturn(PETSC_SUCCESS);
1709 }
1710 
1711 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1712 {
1713   PetscInt  i;
1714   PetscBool flg;
1715 
1716   PetscFunctionBegin;
1717   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1718   for (i = 0; i < n; i++) {
1719     PetscCall(ISEqual(irow[i], icol[i], &flg));
1720     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1721   }
1722   PetscFunctionReturn(PETSC_SUCCESS);
1723 }
1724 
1725 PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1726 {
1727   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1728   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
1729 
1730   PetscFunctionBegin;
1731   if (!Y->preallocated) {
1732     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1733   } else if (!aij->nz) {
1734     PetscInt nonew = aij->nonew;
1735     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1736     aij->nonew = nonew;
1737   }
1738   PetscCall(MatShift_Basic(Y, a));
1739   PetscFunctionReturn(PETSC_SUCCESS);
1740 }
1741 
1742 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1743 {
1744   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1745 
1746   PetscFunctionBegin;
1747   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
1748   PetscCall(MatMissingDiagonal(a->A, missing, d));
1749   if (d) {
1750     PetscInt rstart;
1751     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1752     *d += rstart / A->rmap->bs;
1753   }
1754   PetscFunctionReturn(PETSC_SUCCESS);
1755 }
1756 
1757 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1758 {
1759   PetscFunctionBegin;
1760   *a = ((Mat_MPISBAIJ *)A->data)->A;
1761   PetscFunctionReturn(PETSC_SUCCESS);
1762 }
1763 
1764 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep)
1765 {
1766   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1767 
1768   PetscFunctionBegin;
1769   PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep));       // possibly keep zero diagonal coefficients
1770   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
1771   PetscFunctionReturn(PETSC_SUCCESS);
1772 }
1773 
1774 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1775                                        MatGetRow_MPISBAIJ,
1776                                        MatRestoreRow_MPISBAIJ,
1777                                        MatMult_MPISBAIJ,
1778                                        /*  4*/ MatMultAdd_MPISBAIJ,
1779                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1780                                        MatMultAdd_MPISBAIJ,
1781                                        NULL,
1782                                        NULL,
1783                                        NULL,
1784                                        /* 10*/ NULL,
1785                                        NULL,
1786                                        NULL,
1787                                        MatSOR_MPISBAIJ,
1788                                        MatTranspose_MPISBAIJ,
1789                                        /* 15*/ MatGetInfo_MPISBAIJ,
1790                                        MatEqual_MPISBAIJ,
1791                                        MatGetDiagonal_MPISBAIJ,
1792                                        MatDiagonalScale_MPISBAIJ,
1793                                        MatNorm_MPISBAIJ,
1794                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1795                                        MatAssemblyEnd_MPISBAIJ,
1796                                        MatSetOption_MPISBAIJ,
1797                                        MatZeroEntries_MPISBAIJ,
1798                                        /* 24*/ NULL,
1799                                        NULL,
1800                                        NULL,
1801                                        NULL,
1802                                        NULL,
1803                                        /* 29*/ MatSetUp_MPI_Hash,
1804                                        NULL,
1805                                        NULL,
1806                                        MatGetDiagonalBlock_MPISBAIJ,
1807                                        NULL,
1808                                        /* 34*/ MatDuplicate_MPISBAIJ,
1809                                        NULL,
1810                                        NULL,
1811                                        NULL,
1812                                        NULL,
1813                                        /* 39*/ MatAXPY_MPISBAIJ,
1814                                        MatCreateSubMatrices_MPISBAIJ,
1815                                        MatIncreaseOverlap_MPISBAIJ,
1816                                        MatGetValues_MPISBAIJ,
1817                                        MatCopy_MPISBAIJ,
1818                                        /* 44*/ NULL,
1819                                        MatScale_MPISBAIJ,
1820                                        MatShift_MPISBAIJ,
1821                                        NULL,
1822                                        NULL,
1823                                        /* 49*/ NULL,
1824                                        NULL,
1825                                        NULL,
1826                                        NULL,
1827                                        NULL,
1828                                        /* 54*/ NULL,
1829                                        NULL,
1830                                        MatSetUnfactored_MPISBAIJ,
1831                                        NULL,
1832                                        MatSetValuesBlocked_MPISBAIJ,
1833                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1834                                        NULL,
1835                                        NULL,
1836                                        NULL,
1837                                        NULL,
1838                                        /* 64*/ NULL,
1839                                        NULL,
1840                                        NULL,
1841                                        NULL,
1842                                        NULL,
1843                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1844                                        NULL,
1845                                        MatConvert_MPISBAIJ_Basic,
1846                                        NULL,
1847                                        NULL,
1848                                        /* 74*/ NULL,
1849                                        NULL,
1850                                        NULL,
1851                                        NULL,
1852                                        NULL,
1853                                        /* 79*/ NULL,
1854                                        NULL,
1855                                        NULL,
1856                                        NULL,
1857                                        MatLoad_MPISBAIJ,
1858                                        /* 84*/ NULL,
1859                                        NULL,
1860                                        NULL,
1861                                        NULL,
1862                                        NULL,
1863                                        /* 89*/ NULL,
1864                                        NULL,
1865                                        NULL,
1866                                        NULL,
1867                                        NULL,
1868                                        /* 94*/ NULL,
1869                                        NULL,
1870                                        NULL,
1871                                        NULL,
1872                                        NULL,
1873                                        /* 99*/ NULL,
1874                                        NULL,
1875                                        NULL,
1876                                        MatConjugate_MPISBAIJ,
1877                                        NULL,
1878                                        /*104*/ NULL,
1879                                        MatRealPart_MPISBAIJ,
1880                                        MatImaginaryPart_MPISBAIJ,
1881                                        MatGetRowUpperTriangular_MPISBAIJ,
1882                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1883                                        /*109*/ NULL,
1884                                        NULL,
1885                                        NULL,
1886                                        NULL,
1887                                        MatMissingDiagonal_MPISBAIJ,
1888                                        /*114*/ NULL,
1889                                        NULL,
1890                                        NULL,
1891                                        NULL,
1892                                        NULL,
1893                                        /*119*/ NULL,
1894                                        NULL,
1895                                        NULL,
1896                                        NULL,
1897                                        NULL,
1898                                        /*124*/ NULL,
1899                                        NULL,
1900                                        NULL,
1901                                        NULL,
1902                                        NULL,
1903                                        /*129*/ NULL,
1904                                        NULL,
1905                                        NULL,
1906                                        NULL,
1907                                        NULL,
1908                                        /*134*/ NULL,
1909                                        NULL,
1910                                        NULL,
1911                                        NULL,
1912                                        NULL,
1913                                        /*139*/ MatSetBlockSizes_Default,
1914                                        NULL,
1915                                        NULL,
1916                                        NULL,
1917                                        NULL,
1918                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1919                                        NULL,
1920                                        NULL,
1921                                        NULL,
1922                                        NULL,
1923                                        NULL,
1924                                        /*150*/ NULL,
1925                                        MatEliminateZeros_MPISBAIJ};
1926 
1927 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1928 {
1929   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1930   PetscInt      i, mbs, Mbs;
1931   PetscMPIInt   size;
1932 
1933   PetscFunctionBegin;
1934   if (B->hash_active) {
1935     B->ops[0]      = b->cops;
1936     B->hash_active = PETSC_FALSE;
1937   }
1938   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1939   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
1940   PetscCall(PetscLayoutSetUp(B->rmap));
1941   PetscCall(PetscLayoutSetUp(B->cmap));
1942   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1943   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1944   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1945 
1946   mbs = B->rmap->n / bs;
1947   Mbs = B->rmap->N / bs;
1948   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1949 
1950   B->rmap->bs = bs;
1951   b->bs2      = bs * bs;
1952   b->mbs      = mbs;
1953   b->Mbs      = Mbs;
1954   b->nbs      = B->cmap->n / bs;
1955   b->Nbs      = B->cmap->N / bs;
1956 
1957   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1958   b->rstartbs = B->rmap->rstart / bs;
1959   b->rendbs   = B->rmap->rend / bs;
1960 
1961   b->cstartbs = B->cmap->rstart / bs;
1962   b->cendbs   = B->cmap->rend / bs;
1963 
1964 #if defined(PETSC_USE_CTABLE)
1965   PetscCall(PetscHMapIDestroy(&b->colmap));
1966 #else
1967   PetscCall(PetscFree(b->colmap));
1968 #endif
1969   PetscCall(PetscFree(b->garray));
1970   PetscCall(VecDestroy(&b->lvec));
1971   PetscCall(VecScatterDestroy(&b->Mvctx));
1972   PetscCall(VecDestroy(&b->slvec0));
1973   PetscCall(VecDestroy(&b->slvec0b));
1974   PetscCall(VecDestroy(&b->slvec1));
1975   PetscCall(VecDestroy(&b->slvec1a));
1976   PetscCall(VecDestroy(&b->slvec1b));
1977   PetscCall(VecScatterDestroy(&b->sMvctx));
1978 
1979   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1980   PetscCall(MatDestroy(&b->B));
1981   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
1982   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
1983   PetscCall(MatSetType(b->B, MATSEQBAIJ));
1984 
1985   PetscCall(MatDestroy(&b->A));
1986   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
1987   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
1988   PetscCall(MatSetType(b->A, MATSEQSBAIJ));
1989 
1990   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
1991   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
1992 
1993   B->preallocated  = PETSC_TRUE;
1994   B->was_assembled = PETSC_FALSE;
1995   B->assembled     = PETSC_FALSE;
1996   PetscFunctionReturn(PETSC_SUCCESS);
1997 }
1998 
1999 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2000 {
2001   PetscInt        m, rstart, cend;
2002   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2003   const PetscInt *JJ          = NULL;
2004   PetscScalar    *values      = NULL;
2005   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
2006   PetscBool       nooffprocentries;
2007 
2008   PetscFunctionBegin;
2009   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
2010   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2011   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2012   PetscCall(PetscLayoutSetUp(B->rmap));
2013   PetscCall(PetscLayoutSetUp(B->cmap));
2014   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2015   m      = B->rmap->n / bs;
2016   rstart = B->rmap->rstart / bs;
2017   cend   = B->cmap->rend / bs;
2018 
2019   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2020   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2021   for (i = 0; i < m; i++) {
2022     nz = ii[i + 1] - ii[i];
2023     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2024     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2025     JJ = jj + ii[i];
2026     bd = 0;
2027     for (j = 0; j < nz; j++) {
2028       if (*JJ >= i + rstart) break;
2029       JJ++;
2030       bd++;
2031     }
2032     d = 0;
2033     for (; j < nz; j++) {
2034       if (*JJ++ >= cend) break;
2035       d++;
2036     }
2037     d_nnz[i] = d;
2038     o_nnz[i] = nz - d - bd;
2039     nz       = nz - bd;
2040     nz_max   = PetscMax(nz_max, nz);
2041   }
2042   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2043   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
2044   PetscCall(PetscFree2(d_nnz, o_nnz));
2045 
2046   values = (PetscScalar *)V;
2047   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2048   for (i = 0; i < m; i++) {
2049     PetscInt        row   = i + rstart;
2050     PetscInt        ncols = ii[i + 1] - ii[i];
2051     const PetscInt *icols = jj + ii[i];
2052     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2053       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2054       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2055     } else { /* block ordering does not match so we can only insert one block at a time. */
2056       PetscInt j;
2057       for (j = 0; j < ncols; j++) {
2058         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2059         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2060       }
2061     }
2062   }
2063 
2064   if (!V) PetscCall(PetscFree(values));
2065   nooffprocentries    = B->nooffprocentries;
2066   B->nooffprocentries = PETSC_TRUE;
2067   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2068   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2069   B->nooffprocentries = nooffprocentries;
2070 
2071   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2072   PetscFunctionReturn(PETSC_SUCCESS);
2073 }
2074 
2075 /*MC
2076    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2077    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2078    the matrix is stored.
2079 
2080    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2081    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2082 
2083    Options Database Key:
2084 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2085 
2086    Level: beginner
2087 
2088    Note:
2089      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2090      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2091 
2092 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2093 M*/
2094 
2095 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2096 {
2097   Mat_MPISBAIJ *b;
2098   PetscBool     flg = PETSC_FALSE;
2099 
2100   PetscFunctionBegin;
2101   PetscCall(PetscNew(&b));
2102   B->data   = (void *)b;
2103   B->ops[0] = MatOps_Values;
2104 
2105   B->ops->destroy = MatDestroy_MPISBAIJ;
2106   B->ops->view    = MatView_MPISBAIJ;
2107   B->assembled    = PETSC_FALSE;
2108   B->insertmode   = NOT_SET_VALUES;
2109 
2110   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2111   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2112 
2113   /* build local table of row and column ownerships */
2114   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2115 
2116   /* build cache for off array entries formed */
2117   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2118 
2119   b->donotstash  = PETSC_FALSE;
2120   b->colmap      = NULL;
2121   b->garray      = NULL;
2122   b->roworiented = PETSC_TRUE;
2123 
2124   /* stuff used in block assembly */
2125   b->barray = NULL;
2126 
2127   /* stuff used for matrix vector multiply */
2128   b->lvec    = NULL;
2129   b->Mvctx   = NULL;
2130   b->slvec0  = NULL;
2131   b->slvec0b = NULL;
2132   b->slvec1  = NULL;
2133   b->slvec1a = NULL;
2134   b->slvec1b = NULL;
2135   b->sMvctx  = NULL;
2136 
2137   /* stuff for MatGetRow() */
2138   b->rowindices   = NULL;
2139   b->rowvalues    = NULL;
2140   b->getrowactive = PETSC_FALSE;
2141 
2142   /* hash table stuff */
2143   b->ht           = NULL;
2144   b->hd           = NULL;
2145   b->ht_size      = 0;
2146   b->ht_flag      = PETSC_FALSE;
2147   b->ht_fact      = 0;
2148   b->ht_total_ct  = 0;
2149   b->ht_insert_ct = 0;
2150 
2151   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2152   b->ijonly = PETSC_FALSE;
2153 
2154   b->in_loc = NULL;
2155   b->v_loc  = NULL;
2156   b->n_loc  = 0;
2157 
2158   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2159   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2160   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2161   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2162 #if defined(PETSC_HAVE_ELEMENTAL)
2163   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2164 #endif
2165 #if defined(PETSC_HAVE_SCALAPACK)
2166   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2167 #endif
2168   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2169   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2170 
2171   B->symmetric                   = PETSC_BOOL3_TRUE;
2172   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2173   B->symmetry_eternal            = PETSC_TRUE;
2174   B->structural_symmetry_eternal = PETSC_TRUE;
2175 #if defined(PETSC_USE_COMPLEX)
2176   B->hermitian = PETSC_BOOL3_FALSE;
2177 #else
2178   B->hermitian = PETSC_BOOL3_TRUE;
2179 #endif
2180 
2181   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2182   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2183   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2184   if (flg) {
2185     PetscReal fact = 1.39;
2186     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2187     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2188     if (fact <= 1.0) fact = 1.39;
2189     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2190     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2191   }
2192   PetscOptionsEnd();
2193   PetscFunctionReturn(PETSC_SUCCESS);
2194 }
2195 
2196 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2197 /*MC
2198    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2199 
2200    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2201    and `MATMPISBAIJ` otherwise.
2202 
2203    Options Database Key:
2204 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2205 
2206   Level: beginner
2207 
2208 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2209 M*/
2210 
2211 /*@C
2212   MatMPISBAIJSetPreallocation - For good matrix assembly performance
2213   the user should preallocate the matrix storage by setting the parameters
2214   d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2215   performance can be increased by more than a factor of 50.
2216 
2217   Collective
2218 
2219   Input Parameters:
2220 + B     - the matrix
2221 . bs    - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2222           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2223 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2224            submatrix  (same for all local rows)
2225 . d_nnz - array containing the number of block nonzeros in the various block rows
2226            in the upper triangular and diagonal part of the in diagonal portion of the local
2227            (possibly different for each block row) or `NULL`.  If you plan to factor the matrix you must leave room
2228            for the diagonal entry and set a value even if it is zero.
2229 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2230            submatrix (same for all local rows).
2231 - o_nnz - array containing the number of nonzeros in the various block rows of the
2232            off-diagonal portion of the local submatrix that is right of the diagonal
2233            (possibly different for each block row) or `NULL`.
2234 
2235   Options Database Keys:
2236 + -mat_no_unroll  - uses code that does not unroll the loops in the
2237                      block calculations (much slower)
2238 - -mat_block_size - size of the blocks to use
2239 
2240   Level: intermediate
2241 
2242   Notes:
2243 
2244   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2245   than it must be used on all processors that share the object for that argument.
2246 
2247   If the *_nnz parameter is given then the *_nz parameter is ignored
2248 
2249   Storage Information:
2250   For a square global matrix we define each processor's diagonal portion
2251   to be its local rows and the corresponding columns (a square submatrix);
2252   each processor's off-diagonal portion encompasses the remainder of the
2253   local matrix (a rectangular submatrix).
2254 
2255   The user can specify preallocated storage for the diagonal part of
2256   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
2257   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2258   memory allocation.  Likewise, specify preallocated storage for the
2259   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2260 
2261   You can call `MatGetInfo()` to get information on how effective the preallocation was;
2262   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2263   You can also run with the option `-info` and look for messages with the string
2264   malloc in them to see if additional memory allocation was needed.
2265 
2266   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2267   the figure below we depict these three local rows and all columns (0-11).
2268 
2269 .vb
2270            0 1 2 3 4 5 6 7 8 9 10 11
2271           --------------------------
2272    row 3  |. . . d d d o o o o  o  o
2273    row 4  |. . . d d d o o o o  o  o
2274    row 5  |. . . d d d o o o o  o  o
2275           --------------------------
2276 .ve
2277 
2278   Thus, any entries in the d locations are stored in the d (diagonal)
2279   submatrix, and any entries in the o locations are stored in the
2280   o (off-diagonal) submatrix.  Note that the d matrix is stored in
2281   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2282 
2283   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2284   plus the diagonal part of the d matrix,
2285   and `o_nz` should indicate the number of block nonzeros per row in the o matrix
2286 
2287   In general, for PDE problems in which most nonzeros are near the diagonal,
2288   one expects `d_nz` >> `o_nz`.
2289 
2290 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2291 @*/
2292 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2293 {
2294   PetscFunctionBegin;
2295   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2296   PetscValidType(B, 1);
2297   PetscValidLogicalCollectiveInt(B, bs, 2);
2298   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2299   PetscFunctionReturn(PETSC_SUCCESS);
2300 }
2301 
2302 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2303 /*@C
2304   MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2305   (block compressed row).  For good matrix assembly performance
2306   the user should preallocate the matrix storage by setting the parameters
2307   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
2308 
2309   Collective
2310 
2311   Input Parameters:
2312 + comm  - MPI communicator
2313 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2314           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2315 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
2316            This value should be the same as the local size used in creating the
2317            y vector for the matrix-vector product y = Ax.
2318 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
2319            This value should be the same as the local size used in creating the
2320            x vector for the matrix-vector product y = Ax.
2321 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2322 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2323 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2324            submatrix (same for all local rows)
2325 . d_nnz - array containing the number of block nonzeros in the various block rows
2326            in the upper triangular portion of the in diagonal portion of the local
2327            (possibly different for each block block row) or `NULL`.
2328            If you plan to factor the matrix you must leave room for the diagonal entry and
2329            set its value even if it is zero.
2330 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2331            submatrix (same for all local rows).
2332 - o_nnz - array containing the number of nonzeros in the various block rows of the
2333            off-diagonal portion of the local submatrix (possibly different for
2334            each block row) or `NULL`.
2335 
2336   Output Parameter:
2337 . A - the matrix
2338 
2339   Options Database Keys:
2340 + -mat_no_unroll  - uses code that does not unroll the loops in the
2341                      block calculations (much slower)
2342 . -mat_block_size - size of the blocks to use
2343 - -mat_mpi        - use the parallel matrix data structures even on one processor
2344                (defaults to using SeqBAIJ format on one processor)
2345 
2346   Level: intermediate
2347 
2348   Notes:
2349   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2350   MatXXXXSetPreallocation() paradigm instead of this routine directly.
2351   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2352 
2353   The number of rows and columns must be divisible by blocksize.
2354   This matrix type does not support complex Hermitian operation.
2355 
2356   The user MUST specify either the local or global matrix dimensions
2357   (possibly both).
2358 
2359   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2360   than it must be used on all processors that share the object for that argument.
2361 
2362   If the *_nnz parameter is given then the *_nz parameter is ignored
2363 
2364   Storage Information:
2365   For a square global matrix we define each processor's diagonal portion
2366   to be its local rows and the corresponding columns (a square submatrix);
2367   each processor's off-diagonal portion encompasses the remainder of the
2368   local matrix (a rectangular submatrix).
2369 
2370   The user can specify preallocated storage for the diagonal part of
2371   the local submatrix with either `d_nz` or `d_nnz` (not both). Set
2372   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2373   memory allocation. Likewise, specify preallocated storage for the
2374   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2375 
2376   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2377   the figure below we depict these three local rows and all columns (0-11).
2378 
2379 .vb
2380            0 1 2 3 4 5 6 7 8 9 10 11
2381           --------------------------
2382    row 3  |. . . d d d o o o o  o  o
2383    row 4  |. . . d d d o o o o  o  o
2384    row 5  |. . . d d d o o o o  o  o
2385           --------------------------
2386 .ve
2387 
2388   Thus, any entries in the d locations are stored in the d (diagonal)
2389   submatrix, and any entries in the o locations are stored in the
2390   o (off-diagonal) submatrix. Note that the d matrix is stored in
2391   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2392 
2393   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2394   plus the diagonal part of the d matrix,
2395   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
2396   In general, for PDE problems in which most nonzeros are near the diagonal,
2397   one expects `d_nz` >> `o_nz`.
2398 
2399 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2400 @*/
2401 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2402 {
2403   PetscMPIInt size;
2404 
2405   PetscFunctionBegin;
2406   PetscCall(MatCreate(comm, A));
2407   PetscCall(MatSetSizes(*A, m, n, M, N));
2408   PetscCallMPI(MPI_Comm_size(comm, &size));
2409   if (size > 1) {
2410     PetscCall(MatSetType(*A, MATMPISBAIJ));
2411     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2412   } else {
2413     PetscCall(MatSetType(*A, MATSEQSBAIJ));
2414     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2415   }
2416   PetscFunctionReturn(PETSC_SUCCESS);
2417 }
2418 
2419 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2420 {
2421   Mat           mat;
2422   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2423   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2424   PetscScalar  *array;
2425 
2426   PetscFunctionBegin;
2427   *newmat = NULL;
2428 
2429   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2430   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2431   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2432   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2433   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2434 
2435   mat->factortype   = matin->factortype;
2436   mat->preallocated = PETSC_TRUE;
2437   mat->assembled    = PETSC_TRUE;
2438   mat->insertmode   = NOT_SET_VALUES;
2439 
2440   a      = (Mat_MPISBAIJ *)mat->data;
2441   a->bs2 = oldmat->bs2;
2442   a->mbs = oldmat->mbs;
2443   a->nbs = oldmat->nbs;
2444   a->Mbs = oldmat->Mbs;
2445   a->Nbs = oldmat->Nbs;
2446 
2447   a->size         = oldmat->size;
2448   a->rank         = oldmat->rank;
2449   a->donotstash   = oldmat->donotstash;
2450   a->roworiented  = oldmat->roworiented;
2451   a->rowindices   = NULL;
2452   a->rowvalues    = NULL;
2453   a->getrowactive = PETSC_FALSE;
2454   a->barray       = NULL;
2455   a->rstartbs     = oldmat->rstartbs;
2456   a->rendbs       = oldmat->rendbs;
2457   a->cstartbs     = oldmat->cstartbs;
2458   a->cendbs       = oldmat->cendbs;
2459 
2460   /* hash table stuff */
2461   a->ht           = NULL;
2462   a->hd           = NULL;
2463   a->ht_size      = 0;
2464   a->ht_flag      = oldmat->ht_flag;
2465   a->ht_fact      = oldmat->ht_fact;
2466   a->ht_total_ct  = 0;
2467   a->ht_insert_ct = 0;
2468 
2469   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2470   if (oldmat->colmap) {
2471 #if defined(PETSC_USE_CTABLE)
2472     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2473 #else
2474     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2475     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2476 #endif
2477   } else a->colmap = NULL;
2478 
2479   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
2480     PetscCall(PetscMalloc1(len, &a->garray));
2481     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2482   } else a->garray = NULL;
2483 
2484   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2485   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2486   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2487 
2488   PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2489   PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2490 
2491   PetscCall(VecGetLocalSize(a->slvec1, &nt));
2492   PetscCall(VecGetArray(a->slvec1, &array));
2493   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2494   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2495   PetscCall(VecRestoreArray(a->slvec1, &array));
2496   PetscCall(VecGetArray(a->slvec0, &array));
2497   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2498   PetscCall(VecRestoreArray(a->slvec0, &array));
2499 
2500   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2501   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2502   a->sMvctx = oldmat->sMvctx;
2503 
2504   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2505   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2506   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2507   *newmat = mat;
2508   PetscFunctionReturn(PETSC_SUCCESS);
2509 }
2510 
2511 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2512 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2513 
2514 PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2515 {
2516   PetscBool isbinary;
2517 
2518   PetscFunctionBegin;
2519   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2520   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2521   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2522   PetscFunctionReturn(PETSC_SUCCESS);
2523 }
2524 
2525 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2526 {
2527   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2528   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)(a->B)->data;
2529   PetscReal     atmp;
2530   PetscReal    *work, *svalues, *rvalues;
2531   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2532   PetscMPIInt   rank, size;
2533   PetscInt     *rowners_bs, dest, count, source;
2534   PetscScalar  *va;
2535   MatScalar    *ba;
2536   MPI_Status    stat;
2537 
2538   PetscFunctionBegin;
2539   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2540   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2541   PetscCall(VecGetArray(v, &va));
2542 
2543   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2544   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2545 
2546   bs  = A->rmap->bs;
2547   mbs = a->mbs;
2548   Mbs = a->Mbs;
2549   ba  = b->a;
2550   bi  = b->i;
2551   bj  = b->j;
2552 
2553   /* find ownerships */
2554   rowners_bs = A->rmap->range;
2555 
2556   /* each proc creates an array to be distributed */
2557   PetscCall(PetscCalloc1(bs * Mbs, &work));
2558 
2559   /* row_max for B */
2560   if (rank != size - 1) {
2561     for (i = 0; i < mbs; i++) {
2562       ncols = bi[1] - bi[0];
2563       bi++;
2564       brow = bs * i;
2565       for (j = 0; j < ncols; j++) {
2566         bcol = bs * (*bj);
2567         for (kcol = 0; kcol < bs; kcol++) {
2568           col = bcol + kcol;           /* local col index */
2569           col += rowners_bs[rank + 1]; /* global col index */
2570           for (krow = 0; krow < bs; krow++) {
2571             atmp = PetscAbsScalar(*ba);
2572             ba++;
2573             row = brow + krow; /* local row index */
2574             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2575             if (work[col] < atmp) work[col] = atmp;
2576           }
2577         }
2578         bj++;
2579       }
2580     }
2581 
2582     /* send values to its owners */
2583     for (dest = rank + 1; dest < size; dest++) {
2584       svalues = work + rowners_bs[dest];
2585       count   = rowners_bs[dest + 1] - rowners_bs[dest];
2586       PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2587     }
2588   }
2589 
2590   /* receive values */
2591   if (rank) {
2592     rvalues = work;
2593     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2594     for (source = 0; source < rank; source++) {
2595       PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2596       /* process values */
2597       for (i = 0; i < count; i++) {
2598         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2599       }
2600     }
2601   }
2602 
2603   PetscCall(VecRestoreArray(v, &va));
2604   PetscCall(PetscFree(work));
2605   PetscFunctionReturn(PETSC_SUCCESS);
2606 }
2607 
2608 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2609 {
2610   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2611   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
2612   PetscScalar       *x, *ptr, *from;
2613   Vec                bb1;
2614   const PetscScalar *b;
2615 
2616   PetscFunctionBegin;
2617   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2618   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2619 
2620   if (flag == SOR_APPLY_UPPER) {
2621     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2622     PetscFunctionReturn(PETSC_SUCCESS);
2623   }
2624 
2625   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2626     if (flag & SOR_ZERO_INITIAL_GUESS) {
2627       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2628       its--;
2629     }
2630 
2631     PetscCall(VecDuplicate(bb, &bb1));
2632     while (its--) {
2633       /* lower triangular part: slvec0b = - B^T*xx */
2634       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2635 
2636       /* copy xx into slvec0a */
2637       PetscCall(VecGetArray(mat->slvec0, &ptr));
2638       PetscCall(VecGetArray(xx, &x));
2639       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2640       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2641 
2642       PetscCall(VecScale(mat->slvec0, -1.0));
2643 
2644       /* copy bb into slvec1a */
2645       PetscCall(VecGetArray(mat->slvec1, &ptr));
2646       PetscCall(VecGetArrayRead(bb, &b));
2647       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2648       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2649 
2650       /* set slvec1b = 0 */
2651       PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2652       PetscCall(VecZeroEntries(mat->slvec1b));
2653 
2654       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2655       PetscCall(VecRestoreArray(xx, &x));
2656       PetscCall(VecRestoreArrayRead(bb, &b));
2657       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2658 
2659       /* upper triangular part: bb1 = bb1 - B*x */
2660       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2661 
2662       /* local diagonal sweep */
2663       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2664     }
2665     PetscCall(VecDestroy(&bb1));
2666   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2667     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2668   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2669     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2670   } else if (flag & SOR_EISENSTAT) {
2671     Vec                xx1;
2672     PetscBool          hasop;
2673     const PetscScalar *diag;
2674     PetscScalar       *sl, scale = (omega - 2.0) / omega;
2675     PetscInt           i, n;
2676 
2677     if (!mat->xx1) {
2678       PetscCall(VecDuplicate(bb, &mat->xx1));
2679       PetscCall(VecDuplicate(bb, &mat->bb1));
2680     }
2681     xx1 = mat->xx1;
2682     bb1 = mat->bb1;
2683 
2684     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2685 
2686     if (!mat->diag) {
2687       /* this is wrong for same matrix with new nonzero values */
2688       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2689       PetscCall(MatGetDiagonal(matin, mat->diag));
2690     }
2691     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2692 
2693     if (hasop) {
2694       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2695       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2696     } else {
2697       /*
2698           These two lines are replaced by code that may be a bit faster for a good compiler
2699       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2700       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2701       */
2702       PetscCall(VecGetArray(mat->slvec1a, &sl));
2703       PetscCall(VecGetArrayRead(mat->diag, &diag));
2704       PetscCall(VecGetArrayRead(bb, &b));
2705       PetscCall(VecGetArray(xx, &x));
2706       PetscCall(VecGetLocalSize(xx, &n));
2707       if (omega == 1.0) {
2708         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2709         PetscCall(PetscLogFlops(2.0 * n));
2710       } else {
2711         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2712         PetscCall(PetscLogFlops(3.0 * n));
2713       }
2714       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2715       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2716       PetscCall(VecRestoreArrayRead(bb, &b));
2717       PetscCall(VecRestoreArray(xx, &x));
2718     }
2719 
2720     /* multiply off-diagonal portion of matrix */
2721     PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2722     PetscCall(VecZeroEntries(mat->slvec1b));
2723     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2724     PetscCall(VecGetArray(mat->slvec0, &from));
2725     PetscCall(VecGetArray(xx, &x));
2726     PetscCall(PetscArraycpy(from, x, bs * mbs));
2727     PetscCall(VecRestoreArray(mat->slvec0, &from));
2728     PetscCall(VecRestoreArray(xx, &x));
2729     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2730     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2731     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2732 
2733     /* local sweep */
2734     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2735     PetscCall(VecAXPY(xx, 1.0, xx1));
2736   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2737   PetscFunctionReturn(PETSC_SUCCESS);
2738 }
2739 
2740 /*@
2741   MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard
2742   CSR format the local rows.
2743 
2744   Collective
2745 
2746   Input Parameters:
2747 + comm - MPI communicator
2748 . bs   - the block size, only a block size of 1 is supported
2749 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
2750 . n    - This value should be the same as the local size used in creating the
2751        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
2752        calculated if `N` is given) For square matrices `n` is almost always `m`.
2753 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2754 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2755 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2756 . j    - column indices
2757 - a    - matrix values
2758 
2759   Output Parameter:
2760 . mat - the matrix
2761 
2762   Level: intermediate
2763 
2764   Notes:
2765   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
2766   thus you CANNOT change the matrix entries by changing the values of `a` after you have
2767   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
2768 
2769   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
2770 
2771 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2772           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2773 @*/
2774 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2775 {
2776   PetscFunctionBegin;
2777   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2778   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2779   PetscCall(MatCreate(comm, mat));
2780   PetscCall(MatSetSizes(*mat, m, n, M, N));
2781   PetscCall(MatSetType(*mat, MATMPISBAIJ));
2782   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2783   PetscFunctionReturn(PETSC_SUCCESS);
2784 }
2785 
2786 /*@C
2787   MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2788 
2789   Collective
2790 
2791   Input Parameters:
2792 + B  - the matrix
2793 . bs - the block size
2794 . i  - the indices into `j` for the start of each local row (starts with zero)
2795 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2796 - v  - optional values in the matrix
2797 
2798   Level: advanced
2799 
2800   Notes:
2801   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2802   and usually the numerical values as well
2803 
2804   Any entries below the diagonal are ignored
2805 
2806 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2807 @*/
2808 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2809 {
2810   PetscFunctionBegin;
2811   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2812   PetscFunctionReturn(PETSC_SUCCESS);
2813 }
2814 
2815 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2816 {
2817   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
2818   PetscInt    *indx;
2819   PetscScalar *values;
2820 
2821   PetscFunctionBegin;
2822   PetscCall(MatGetSize(inmat, &m, &N));
2823   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2824     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2825     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
2826     PetscInt     *bindx, rmax = a->rmax, j;
2827     PetscMPIInt   rank, size;
2828 
2829     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2830     mbs = m / bs;
2831     Nbs = N / cbs;
2832     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2833     nbs = n / cbs;
2834 
2835     PetscCall(PetscMalloc1(rmax, &bindx));
2836     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2837 
2838     PetscCallMPI(MPI_Comm_rank(comm, &rank));
2839     PetscCallMPI(MPI_Comm_rank(comm, &size));
2840     if (rank == size - 1) {
2841       /* Check sum(nbs) = Nbs */
2842       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2843     }
2844 
2845     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2846     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2847     for (i = 0; i < mbs; i++) {
2848       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2849       nnz = nnz / bs;
2850       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2851       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2852       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2853     }
2854     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2855     PetscCall(PetscFree(bindx));
2856 
2857     PetscCall(MatCreate(comm, outmat));
2858     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2859     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2860     PetscCall(MatSetType(*outmat, MATSBAIJ));
2861     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2862     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2863     MatPreallocateEnd(dnz, onz);
2864   }
2865 
2866   /* numeric phase */
2867   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2868   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2869 
2870   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2871   for (i = 0; i < m; i++) {
2872     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2873     Ii = i + rstart;
2874     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2875     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2876   }
2877   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2878   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2879   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2880   PetscFunctionReturn(PETSC_SUCCESS);
2881 }
2882