xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 7f296bb328fcd4c99f2da7bfe8ba7ed8a4ebceee)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13   PetscCall(MatStashDestroy_Private(&mat->stash));
14   PetscCall(MatStashDestroy_Private(&mat->bstash));
15   PetscCall(MatDestroy(&baij->A));
16   PetscCall(MatDestroy(&baij->B));
17 #if defined(PETSC_USE_CTABLE)
18   PetscCall(PetscHMapIDestroy(&baij->colmap));
19 #else
20   PetscCall(PetscFree(baij->colmap));
21 #endif
22   PetscCall(PetscFree(baij->garray));
23   PetscCall(VecDestroy(&baij->lvec));
24   PetscCall(VecScatterDestroy(&baij->Mvctx));
25   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
26   PetscCall(PetscFree(baij->barray));
27   PetscCall(PetscFree2(baij->hd, baij->ht));
28   PetscCall(PetscFree(baij->rangebs));
29   PetscCall(PetscFree(mat->data));
30 
31   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
32   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
33   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
41 #if defined(PETSC_HAVE_HYPRE)
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
43 #endif
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
49 #define TYPE BAIJ
50 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
51 #undef TYPE
52 
53 #if defined(PETSC_HAVE_HYPRE)
54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
55 #endif
56 
57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
58 {
59   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
60   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
61   PetscScalar       *vv;
62   Vec                vB, vA;
63   const PetscScalar *va, *vb;
64 
65   PetscFunctionBegin;
66   PetscCall(MatCreateVecs(a->A, NULL, &vA));
67   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
68 
69   PetscCall(VecGetArrayRead(vA, &va));
70   if (idx) {
71     for (i = 0; i < m; i++) {
72       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
73     }
74   }
75 
76   PetscCall(MatCreateVecs(a->B, NULL, &vB));
77   PetscCall(PetscMalloc1(m, &idxb));
78   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
79 
80   PetscCall(VecGetArrayWrite(v, &vv));
81   PetscCall(VecGetArrayRead(vB, &vb));
82   for (i = 0; i < m; i++) {
83     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
84       vv[i] = vb[i];
85       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
86     } else {
87       vv[i] = va[i];
88       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
89     }
90   }
91   PetscCall(VecRestoreArrayWrite(v, &vv));
92   PetscCall(VecRestoreArrayRead(vA, &va));
93   PetscCall(VecRestoreArrayRead(vB, &vb));
94   PetscCall(PetscFree(idxb));
95   PetscCall(VecDestroy(&vA));
96   PetscCall(VecDestroy(&vB));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v)
101 {
102   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
103   Vec          vB, vA;
104 
105   PetscFunctionBegin;
106   PetscCall(MatCreateVecs(a->A, NULL, &vA));
107   PetscCall(MatGetRowSumAbs(a->A, vA));
108   PetscCall(MatCreateVecs(a->B, NULL, &vB));
109   PetscCall(MatGetRowSumAbs(a->B, vB));
110   PetscCall(VecAXPY(vA, 1.0, vB));
111   PetscCall(VecDestroy(&vB));
112   PetscCall(VecCopy(vA, v));
113   PetscCall(VecDestroy(&vA));
114   PetscFunctionReturn(PETSC_SUCCESS);
115 }
116 
117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
118 {
119   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
120 
121   PetscFunctionBegin;
122   PetscCall(MatStoreValues(aij->A));
123   PetscCall(MatStoreValues(aij->B));
124   PetscFunctionReturn(PETSC_SUCCESS);
125 }
126 
127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
128 {
129   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
130 
131   PetscFunctionBegin;
132   PetscCall(MatRetrieveValues(aij->A));
133   PetscCall(MatRetrieveValues(aij->B));
134   PetscFunctionReturn(PETSC_SUCCESS);
135 }
136 
137 /*
138      Local utility routine that creates a mapping from the global column
139    number to the local number in the off-diagonal part of the local
140    storage of the matrix.  This is done in a non scalable way since the
141    length of colmap equals the global matrix length.
142 */
143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
144 {
145   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
146   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
147   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
148 
149   PetscFunctionBegin;
150 #if defined(PETSC_USE_CTABLE)
151   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
152   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
153 #else
154   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
155   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
156 #endif
157   PetscFunctionReturn(PETSC_SUCCESS);
158 }
159 
160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
161   do { \
162     brow = row / bs; \
163     rp   = PetscSafePointerPlusOffset(aj, ai[brow]); \
164     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \
165     rmax = aimax[brow]; \
166     nrow = ailen[brow]; \
167     bcol = col / bs; \
168     ridx = row % bs; \
169     cidx = col % bs; \
170     low  = 0; \
171     high = nrow; \
172     while (high - low > 3) { \
173       t = (low + high) / 2; \
174       if (rp[t] > bcol) high = t; \
175       else low = t; \
176     } \
177     for (_i = low; _i < high; _i++) { \
178       if (rp[_i] > bcol) break; \
179       if (rp[_i] == bcol) { \
180         bap = ap + bs2 * _i + bs * cidx + ridx; \
181         if (addv == ADD_VALUES) *bap += value; \
182         else *bap = value; \
183         goto a_noinsert; \
184       } \
185     } \
186     if (a->nonew == 1) goto a_noinsert; \
187     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
188     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
189     N = nrow++ - 1; \
190     /* shift up all the later entries in this row */ \
191     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
192     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
193     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
194     rp[_i]                          = bcol; \
195     ap[bs2 * _i + bs * cidx + ridx] = value; \
196   a_noinsert:; \
197     ailen[brow] = nrow; \
198   } while (0)
199 
200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
201   do { \
202     brow = row / bs; \
203     rp   = PetscSafePointerPlusOffset(bj, bi[brow]); \
204     ap   = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \
205     rmax = bimax[brow]; \
206     nrow = bilen[brow]; \
207     bcol = col / bs; \
208     ridx = row % bs; \
209     cidx = col % bs; \
210     low  = 0; \
211     high = nrow; \
212     while (high - low > 3) { \
213       t = (low + high) / 2; \
214       if (rp[t] > bcol) high = t; \
215       else low = t; \
216     } \
217     for (_i = low; _i < high; _i++) { \
218       if (rp[_i] > bcol) break; \
219       if (rp[_i] == bcol) { \
220         bap = ap + bs2 * _i + bs * cidx + ridx; \
221         if (addv == ADD_VALUES) *bap += value; \
222         else *bap = value; \
223         goto b_noinsert; \
224       } \
225     } \
226     if (b->nonew == 1) goto b_noinsert; \
227     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
228     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
229     N = nrow++ - 1; \
230     /* shift up all the later entries in this row */ \
231     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
232     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
233     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
234     rp[_i]                          = bcol; \
235     ap[bs2 * _i + bs * cidx + ridx] = value; \
236   b_noinsert:; \
237     bilen[brow] = nrow; \
238   } while (0)
239 
240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
241 {
242   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
243   MatScalar    value;
244   PetscBool    roworiented = baij->roworiented;
245   PetscInt     i, j, row, col;
246   PetscInt     rstart_orig = mat->rmap->rstart;
247   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
248   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
249 
250   /* Some Variables required in the macro */
251   Mat          A     = baij->A;
252   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)A->data;
253   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
254   MatScalar   *aa = a->a;
255 
256   Mat          B     = baij->B;
257   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)B->data;
258   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
259   MatScalar   *ba = b->a;
260 
261   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
262   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
263   MatScalar *ap, *bap;
264 
265   PetscFunctionBegin;
266   for (i = 0; i < m; i++) {
267     if (im[i] < 0) continue;
268     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
269     if (im[i] >= rstart_orig && im[i] < rend_orig) {
270       row = im[i] - rstart_orig;
271       for (j = 0; j < n; j++) {
272         if (in[j] >= cstart_orig && in[j] < cend_orig) {
273           col = in[j] - cstart_orig;
274           if (roworiented) value = v[i * n + j];
275           else value = v[i + j * m];
276           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
277         } else if (in[j] < 0) {
278           continue;
279         } else {
280           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
281           if (mat->was_assembled) {
282             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
283 #if defined(PETSC_USE_CTABLE)
284             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
285             col = col - 1;
286 #else
287             col = baij->colmap[in[j] / bs] - 1;
288 #endif
289             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
290               PetscCall(MatDisAssemble_MPIBAIJ(mat));
291               col = in[j];
292               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
293               B     = baij->B;
294               b     = (Mat_SeqBAIJ *)B->data;
295               bimax = b->imax;
296               bi    = b->i;
297               bilen = b->ilen;
298               bj    = b->j;
299               ba    = b->a;
300             } else {
301               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
302               col += in[j] % bs;
303             }
304           } else col = in[j];
305           if (roworiented) value = v[i * n + j];
306           else value = v[i + j * m];
307           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
308           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
309         }
310       }
311     } else {
312       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
313       if (!baij->donotstash) {
314         mat->assembled = PETSC_FALSE;
315         if (roworiented) {
316           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
317         } else {
318           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
319         }
320       }
321     }
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
327 {
328   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
329   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
330   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
331   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
332   PetscBool          roworiented = a->roworiented;
333   const PetscScalar *value       = v;
334   MatScalar         *ap, *aa = a->a, *bap;
335 
336   PetscFunctionBegin;
337   rp    = aj + ai[row];
338   ap    = aa + bs2 * ai[row];
339   rmax  = imax[row];
340   nrow  = ailen[row];
341   value = v;
342   low   = 0;
343   high  = nrow;
344   while (high - low > 7) {
345     t = (low + high) / 2;
346     if (rp[t] > col) high = t;
347     else low = t;
348   }
349   for (i = low; i < high; i++) {
350     if (rp[i] > col) break;
351     if (rp[i] == col) {
352       bap = ap + bs2 * i;
353       if (roworiented) {
354         if (is == ADD_VALUES) {
355           for (ii = 0; ii < bs; ii++) {
356             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
357           }
358         } else {
359           for (ii = 0; ii < bs; ii++) {
360             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
361           }
362         }
363       } else {
364         if (is == ADD_VALUES) {
365           for (ii = 0; ii < bs; ii++, value += bs) {
366             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
367             bap += bs;
368           }
369         } else {
370           for (ii = 0; ii < bs; ii++, value += bs) {
371             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
372             bap += bs;
373           }
374         }
375       }
376       goto noinsert2;
377     }
378   }
379   if (nonew == 1) goto noinsert2;
380   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
381   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
382   N = nrow++ - 1;
383   high++;
384   /* shift up all the later entries in this row */
385   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
386   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
387   rp[i] = col;
388   bap   = ap + bs2 * i;
389   if (roworiented) {
390     for (ii = 0; ii < bs; ii++) {
391       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
392     }
393   } else {
394     for (ii = 0; ii < bs; ii++) {
395       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
396     }
397   }
398 noinsert2:;
399   ailen[row] = nrow;
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 /*
404     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
405     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
406 */
407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
408 {
409   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
410   const PetscScalar *value;
411   MatScalar         *barray      = baij->barray;
412   PetscBool          roworiented = baij->roworiented;
413   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
414   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
415   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
416 
417   PetscFunctionBegin;
418   if (!barray) {
419     PetscCall(PetscMalloc1(bs2, &barray));
420     baij->barray = barray;
421   }
422 
423   if (roworiented) stepval = (n - 1) * bs;
424   else stepval = (m - 1) * bs;
425 
426   for (i = 0; i < m; i++) {
427     if (im[i] < 0) continue;
428     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
429     if (im[i] >= rstart && im[i] < rend) {
430       row = im[i] - rstart;
431       for (j = 0; j < n; j++) {
432         /* If NumCol = 1 then a copy is not required */
433         if ((roworiented) && (n == 1)) {
434           barray = (MatScalar *)v + i * bs2;
435         } else if ((!roworiented) && (m == 1)) {
436           barray = (MatScalar *)v + j * bs2;
437         } else { /* Here a copy is required */
438           if (roworiented) {
439             value = v + (i * (stepval + bs) + j) * bs;
440           } else {
441             value = v + (j * (stepval + bs) + i) * bs;
442           }
443           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
444             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
445             barray += bs;
446           }
447           barray -= bs2;
448         }
449 
450         if (in[j] >= cstart && in[j] < cend) {
451           col = in[j] - cstart;
452           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
453         } else if (in[j] < 0) {
454           continue;
455         } else {
456           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
457           if (mat->was_assembled) {
458             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
459 
460 #if defined(PETSC_USE_CTABLE)
461             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
462             col = col < 1 ? -1 : (col - 1) / bs;
463 #else
464             col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs;
465 #endif
466             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
467               PetscCall(MatDisAssemble_MPIBAIJ(mat));
468               col = in[j];
469             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
470           } else col = in[j];
471           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
472         }
473       }
474     } else {
475       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
476       if (!baij->donotstash) {
477         if (roworiented) {
478           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
479         } else {
480           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
481         }
482       }
483     }
484   }
485   PetscFunctionReturn(PETSC_SUCCESS);
486 }
487 
488 #define HASH_KEY             0.6180339887
489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
493 {
494   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
495   PetscBool    roworiented = baij->roworiented;
496   PetscInt     i, j, row, col;
497   PetscInt     rstart_orig = mat->rmap->rstart;
498   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
499   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
500   PetscReal    tmp;
501   MatScalar  **HD       = baij->hd, value;
502   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
503 
504   PetscFunctionBegin;
505   for (i = 0; i < m; i++) {
506     if (PetscDefined(USE_DEBUG)) {
507       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
508       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
509     }
510     row = im[i];
511     if (row >= rstart_orig && row < rend_orig) {
512       for (j = 0; j < n; j++) {
513         col = in[j];
514         if (roworiented) value = v[i * n + j];
515         else value = v[i + j * m];
516         /* Look up PetscInto the Hash Table */
517         key = (row / bs) * Nbs + (col / bs) + 1;
518         h1  = HASH(size, key, tmp);
519 
520         idx = h1;
521         if (PetscDefined(USE_DEBUG)) {
522           insert_ct++;
523           total_ct++;
524           if (HT[idx] != key) {
525             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
526             if (idx == size) {
527               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
528               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
529             }
530           }
531         } else if (HT[idx] != key) {
532           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
533           if (idx == size) {
534             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
535             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
536           }
537         }
538         /* A HASH table entry is found, so insert the values at the correct address */
539         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
540         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
541       }
542     } else if (!baij->donotstash) {
543       if (roworiented) {
544         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
545       } else {
546         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
547       }
548     }
549   }
550   if (PetscDefined(USE_DEBUG)) {
551     baij->ht_total_ct += total_ct;
552     baij->ht_insert_ct += insert_ct;
553   }
554   PetscFunctionReturn(PETSC_SUCCESS);
555 }
556 
557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
558 {
559   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
560   PetscBool          roworiented = baij->roworiented;
561   PetscInt           i, j, ii, jj, row, col;
562   PetscInt           rstart = baij->rstartbs;
563   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
564   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
565   PetscReal          tmp;
566   MatScalar        **HD = baij->hd, *baij_a;
567   const PetscScalar *v_t, *value;
568   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
569 
570   PetscFunctionBegin;
571   if (roworiented) stepval = (n - 1) * bs;
572   else stepval = (m - 1) * bs;
573 
574   for (i = 0; i < m; i++) {
575     if (PetscDefined(USE_DEBUG)) {
576       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
577       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
578     }
579     row = im[i];
580     v_t = v + i * nbs2;
581     if (row >= rstart && row < rend) {
582       for (j = 0; j < n; j++) {
583         col = in[j];
584 
585         /* Look up into the Hash Table */
586         key = row * Nbs + col + 1;
587         h1  = HASH(size, key, tmp);
588 
589         idx = h1;
590         if (PetscDefined(USE_DEBUG)) {
591           total_ct++;
592           insert_ct++;
593           if (HT[idx] != key) {
594             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
595             if (idx == size) {
596               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
597               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
598             }
599           }
600         } else if (HT[idx] != key) {
601           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
602           if (idx == size) {
603             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
604             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
605           }
606         }
607         baij_a = HD[idx];
608         if (roworiented) {
609           /*value = v + i*(stepval+bs)*bs + j*bs;*/
610           /* value = v + (i*(stepval+bs)+j)*bs; */
611           value = v_t;
612           v_t += bs;
613           if (addv == ADD_VALUES) {
614             for (ii = 0; ii < bs; ii++, value += stepval) {
615               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
616             }
617           } else {
618             for (ii = 0; ii < bs; ii++, value += stepval) {
619               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
620             }
621           }
622         } else {
623           value = v + j * (stepval + bs) * bs + i * bs;
624           if (addv == ADD_VALUES) {
625             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
626               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
627             }
628           } else {
629             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
630               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
631             }
632           }
633         }
634       }
635     } else {
636       if (!baij->donotstash) {
637         if (roworiented) {
638           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
639         } else {
640           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641         }
642       }
643     }
644   }
645   if (PetscDefined(USE_DEBUG)) {
646     baij->ht_total_ct += total_ct;
647     baij->ht_insert_ct += insert_ct;
648   }
649   PetscFunctionReturn(PETSC_SUCCESS);
650 }
651 
652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
653 {
654   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
655   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
656   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
657 
658   PetscFunctionBegin;
659   for (i = 0; i < m; i++) {
660     if (idxm[i] < 0) continue; /* negative row */
661     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
662     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
663       row = idxm[i] - bsrstart;
664       for (j = 0; j < n; j++) {
665         if (idxn[j] < 0) continue; /* negative column */
666         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
667         if (idxn[j] >= bscstart && idxn[j] < bscend) {
668           col = idxn[j] - bscstart;
669           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
670         } else {
671           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
672 #if defined(PETSC_USE_CTABLE)
673           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
674           data--;
675 #else
676           data = baij->colmap[idxn[j] / bs] - 1;
677 #endif
678           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
679           else {
680             col = data + idxn[j] % bs;
681             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
682           }
683         }
684       }
685     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
686   }
687   PetscFunctionReturn(PETSC_SUCCESS);
688 }
689 
690 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
691 {
692   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
693   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
694   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
695   PetscReal    sum = 0.0;
696   MatScalar   *v;
697 
698   PetscFunctionBegin;
699   if (baij->size == 1) {
700     PetscCall(MatNorm(baij->A, type, nrm));
701   } else {
702     if (type == NORM_FROBENIUS) {
703       v  = amat->a;
704       nz = amat->nz * bs2;
705       for (i = 0; i < nz; i++) {
706         sum += PetscRealPart(PetscConj(*v) * (*v));
707         v++;
708       }
709       v  = bmat->a;
710       nz = bmat->nz * bs2;
711       for (i = 0; i < nz; i++) {
712         sum += PetscRealPart(PetscConj(*v) * (*v));
713         v++;
714       }
715       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
716       *nrm = PetscSqrtReal(*nrm);
717     } else if (type == NORM_1) { /* max column sum */
718       PetscReal *tmp;
719       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
720 
721       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
722       v  = amat->a;
723       jj = amat->j;
724       for (i = 0; i < amat->nz; i++) {
725         for (j = 0; j < bs; j++) {
726           col = bs * (cstart + *jj) + j; /* column index */
727           for (row = 0; row < bs; row++) {
728             tmp[col] += PetscAbsScalar(*v);
729             v++;
730           }
731         }
732         jj++;
733       }
734       v  = bmat->a;
735       jj = bmat->j;
736       for (i = 0; i < bmat->nz; i++) {
737         for (j = 0; j < bs; j++) {
738           col = bs * garray[*jj] + j;
739           for (row = 0; row < bs; row++) {
740             tmp[col] += PetscAbsScalar(*v);
741             v++;
742           }
743         }
744         jj++;
745       }
746       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
747       *nrm = 0.0;
748       for (j = 0; j < mat->cmap->N; j++) {
749         if (tmp[j] > *nrm) *nrm = tmp[j];
750       }
751       PetscCall(PetscFree(tmp));
752     } else if (type == NORM_INFINITY) { /* max row sum */
753       PetscReal *sums;
754       PetscCall(PetscMalloc1(bs, &sums));
755       sum = 0.0;
756       for (j = 0; j < amat->mbs; j++) {
757         for (row = 0; row < bs; row++) sums[row] = 0.0;
758         v  = amat->a + bs2 * amat->i[j];
759         nz = amat->i[j + 1] - amat->i[j];
760         for (i = 0; i < nz; i++) {
761           for (col = 0; col < bs; col++) {
762             for (row = 0; row < bs; row++) {
763               sums[row] += PetscAbsScalar(*v);
764               v++;
765             }
766           }
767         }
768         v  = bmat->a + bs2 * bmat->i[j];
769         nz = bmat->i[j + 1] - bmat->i[j];
770         for (i = 0; i < nz; i++) {
771           for (col = 0; col < bs; col++) {
772             for (row = 0; row < bs; row++) {
773               sums[row] += PetscAbsScalar(*v);
774               v++;
775             }
776           }
777         }
778         for (row = 0; row < bs; row++) {
779           if (sums[row] > sum) sum = sums[row];
780         }
781       }
782       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
783       PetscCall(PetscFree(sums));
784     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
785   }
786   PetscFunctionReturn(PETSC_SUCCESS);
787 }
788 
789 /*
790   Creates the hash table, and sets the table
791   This table is created only once.
792   If new entried need to be added to the matrix
793   then the hash table has to be destroyed and
794   recreated.
795 */
796 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
797 {
798   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
799   Mat          A = baij->A, B = baij->B;
800   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
801   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
802   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
803   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
804   PetscInt    *HT, key;
805   MatScalar  **HD;
806   PetscReal    tmp;
807 #if defined(PETSC_USE_INFO)
808   PetscInt ct = 0, max = 0;
809 #endif
810 
811   PetscFunctionBegin;
812   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
813 
814   baij->ht_size = (PetscInt)(factor * nz);
815   ht_size       = baij->ht_size;
816 
817   /* Allocate Memory for Hash Table */
818   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
819   HD = baij->hd;
820   HT = baij->ht;
821 
822   /* Loop Over A */
823   for (i = 0; i < a->mbs; i++) {
824     for (j = ai[i]; j < ai[i + 1]; j++) {
825       row = i + rstart;
826       col = aj[j] + cstart;
827 
828       key = row * Nbs + col + 1;
829       h1  = HASH(ht_size, key, tmp);
830       for (k = 0; k < ht_size; k++) {
831         if (!HT[(h1 + k) % ht_size]) {
832           HT[(h1 + k) % ht_size] = key;
833           HD[(h1 + k) % ht_size] = a->a + j * bs2;
834           break;
835 #if defined(PETSC_USE_INFO)
836         } else {
837           ct++;
838 #endif
839         }
840       }
841 #if defined(PETSC_USE_INFO)
842       if (k > max) max = k;
843 #endif
844     }
845   }
846   /* Loop Over B */
847   for (i = 0; i < b->mbs; i++) {
848     for (j = bi[i]; j < bi[i + 1]; j++) {
849       row = i + rstart;
850       col = garray[bj[j]];
851       key = row * Nbs + col + 1;
852       h1  = HASH(ht_size, key, tmp);
853       for (k = 0; k < ht_size; k++) {
854         if (!HT[(h1 + k) % ht_size]) {
855           HT[(h1 + k) % ht_size] = key;
856           HD[(h1 + k) % ht_size] = b->a + j * bs2;
857           break;
858 #if defined(PETSC_USE_INFO)
859         } else {
860           ct++;
861 #endif
862         }
863       }
864 #if defined(PETSC_USE_INFO)
865       if (k > max) max = k;
866 #endif
867     }
868   }
869 
870   /* Print Summary */
871 #if defined(PETSC_USE_INFO)
872   for (i = 0, j = 0; i < ht_size; i++) {
873     if (HT[i]) j++;
874   }
875   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? 0.0 : (double)(((PetscReal)(ct + j)) / j), max));
876 #endif
877   PetscFunctionReturn(PETSC_SUCCESS);
878 }
879 
880 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
881 {
882   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
883   PetscInt     nstash, reallocs;
884 
885   PetscFunctionBegin;
886   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
887 
888   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
889   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
890   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
891   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
892   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
893   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
894   PetscFunctionReturn(PETSC_SUCCESS);
895 }
896 
897 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
898 {
899   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
900   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
901   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
902   PetscInt    *row, *col;
903   PetscBool    r1, r2, r3, other_disassembled;
904   MatScalar   *val;
905   PetscMPIInt  n;
906 
907   PetscFunctionBegin;
908   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
909   if (!baij->donotstash && !mat->nooffprocentries) {
910     while (1) {
911       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
912       if (!flg) break;
913 
914       for (i = 0; i < n;) {
915         /* Now identify the consecutive vals belonging to the same row */
916         for (j = i, rstart = row[j]; j < n; j++) {
917           if (row[j] != rstart) break;
918         }
919         if (j < n) ncols = j - i;
920         else ncols = n - i;
921         /* Now assemble all these values with a single function call */
922         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
923         i = j;
924       }
925     }
926     PetscCall(MatStashScatterEnd_Private(&mat->stash));
927     /* Now process the block-stash. Since the values are stashed column-oriented,
928        set the row-oriented flag to column-oriented, and after MatSetValues()
929        restore the original flags */
930     r1 = baij->roworiented;
931     r2 = a->roworiented;
932     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
933 
934     baij->roworiented                           = PETSC_FALSE;
935     a->roworiented                              = PETSC_FALSE;
936     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE;
937     while (1) {
938       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
939       if (!flg) break;
940 
941       for (i = 0; i < n;) {
942         /* Now identify the consecutive vals belonging to the same row */
943         for (j = i, rstart = row[j]; j < n; j++) {
944           if (row[j] != rstart) break;
945         }
946         if (j < n) ncols = j - i;
947         else ncols = n - i;
948         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
949         i = j;
950       }
951     }
952     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
953 
954     baij->roworiented                           = r1;
955     a->roworiented                              = r2;
956     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3;
957   }
958 
959   PetscCall(MatAssemblyBegin(baij->A, mode));
960   PetscCall(MatAssemblyEnd(baij->A, mode));
961 
962   /* determine if any processor has disassembled, if so we must
963      also disassemble ourselves, in order that we may reassemble. */
964   /*
965      if nonzero structure of submatrix B cannot change then we know that
966      no processor disassembled thus we can skip this stuff
967   */
968   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
969     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
970     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
971   }
972 
973   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
974   PetscCall(MatAssemblyBegin(baij->B, mode));
975   PetscCall(MatAssemblyEnd(baij->B, mode));
976 
977 #if defined(PETSC_USE_INFO)
978   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
979     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
980 
981     baij->ht_total_ct  = 0;
982     baij->ht_insert_ct = 0;
983   }
984 #endif
985   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
986     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
987 
988     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
989     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
990   }
991 
992   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
993 
994   baij->rowvalues = NULL;
995 
996   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
997   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
998     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
999     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
1000   }
1001   PetscFunctionReturn(PETSC_SUCCESS);
1002 }
1003 
1004 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
1005 #include <petscdraw.h>
1006 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1007 {
1008   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1009   PetscMPIInt       rank = baij->rank;
1010   PetscInt          bs   = mat->rmap->bs;
1011   PetscBool         iascii, isdraw;
1012   PetscViewer       sviewer;
1013   PetscViewerFormat format;
1014 
1015   PetscFunctionBegin;
1016   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1017   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1018   if (iascii) {
1019     PetscCall(PetscViewerGetFormat(viewer, &format));
1020     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1021       MatInfo info;
1022       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1023       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1024       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1025       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1026                                                    mat->rmap->bs, info.memory));
1027       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1028       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1029       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1030       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1031       PetscCall(PetscViewerFlush(viewer));
1032       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1033       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1034       PetscCall(VecScatterView(baij->Mvctx, viewer));
1035       PetscFunctionReturn(PETSC_SUCCESS);
1036     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1037       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1038       PetscFunctionReturn(PETSC_SUCCESS);
1039     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1040       PetscFunctionReturn(PETSC_SUCCESS);
1041     }
1042   }
1043 
1044   if (isdraw) {
1045     PetscDraw draw;
1046     PetscBool isnull;
1047     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1048     PetscCall(PetscDrawIsNull(draw, &isnull));
1049     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1050   }
1051 
1052   {
1053     /* assemble the entire matrix onto first processor. */
1054     Mat          A;
1055     Mat_SeqBAIJ *Aloc;
1056     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1057     MatScalar   *a;
1058     const char  *matname;
1059 
1060     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1061     /* Perhaps this should be the type of mat? */
1062     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1063     if (rank == 0) {
1064       PetscCall(MatSetSizes(A, M, N, M, N));
1065     } else {
1066       PetscCall(MatSetSizes(A, 0, 0, M, N));
1067     }
1068     PetscCall(MatSetType(A, MATMPIBAIJ));
1069     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1070     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1071 
1072     /* copy over the A part */
1073     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1074     ai   = Aloc->i;
1075     aj   = Aloc->j;
1076     a    = Aloc->a;
1077     PetscCall(PetscMalloc1(bs, &rvals));
1078 
1079     for (i = 0; i < mbs; i++) {
1080       rvals[0] = bs * (baij->rstartbs + i);
1081       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1082       for (j = ai[i]; j < ai[i + 1]; j++) {
1083         col = (baij->cstartbs + aj[j]) * bs;
1084         for (k = 0; k < bs; k++) {
1085           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1086           col++;
1087           a += bs;
1088         }
1089       }
1090     }
1091     /* copy over the B part */
1092     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1093     ai   = Aloc->i;
1094     aj   = Aloc->j;
1095     a    = Aloc->a;
1096     for (i = 0; i < mbs; i++) {
1097       rvals[0] = bs * (baij->rstartbs + i);
1098       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1099       for (j = ai[i]; j < ai[i + 1]; j++) {
1100         col = baij->garray[aj[j]] * bs;
1101         for (k = 0; k < bs; k++) {
1102           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1103           col++;
1104           a += bs;
1105         }
1106       }
1107     }
1108     PetscCall(PetscFree(rvals));
1109     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1110     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1111     /*
1112        Everyone has to call to draw the matrix since the graphics waits are
1113        synchronized across all processors that share the PetscDraw object
1114     */
1115     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1116     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1117     if (rank == 0) {
1118       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname));
1119       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer));
1120     }
1121     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1122     PetscCall(MatDestroy(&A));
1123   }
1124   PetscFunctionReturn(PETSC_SUCCESS);
1125 }
1126 
1127 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1128 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1129 {
1130   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1131   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1132   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1133   const PetscInt *garray = aij->garray;
1134   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1135   PetscCount      nz, hnz;
1136   PetscInt       *rowlens, *colidxs;
1137   PetscScalar    *matvals;
1138   PetscMPIInt     rank;
1139 
1140   PetscFunctionBegin;
1141   PetscCall(PetscViewerSetUp(viewer));
1142 
1143   M  = mat->rmap->N;
1144   N  = mat->cmap->N;
1145   m  = mat->rmap->n;
1146   rs = mat->rmap->rstart;
1147   cs = mat->cmap->rstart;
1148   bs = mat->rmap->bs;
1149   nz = bs * bs * (A->nz + B->nz);
1150 
1151   /* write matrix header */
1152   header[0] = MAT_FILE_CLASSID;
1153   header[1] = M;
1154   header[2] = N;
1155   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1156   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1157   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1158   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1159 
1160   /* fill in and store row lengths */
1161   PetscCall(PetscMalloc1(m, &rowlens));
1162   for (cnt = 0, i = 0; i < A->mbs; i++)
1163     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1164   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1165   PetscCall(PetscFree(rowlens));
1166 
1167   /* fill in and store column indices */
1168   PetscCall(PetscMalloc1(nz, &colidxs));
1169   for (cnt = 0, i = 0; i < A->mbs; i++) {
1170     for (k = 0; k < bs; k++) {
1171       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1172         if (garray[B->j[jb]] > cs / bs) break;
1173         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1174       }
1175       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1176         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1177       for (; jb < B->i[i + 1]; jb++)
1178         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1179     }
1180   }
1181   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz);
1182   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1183   PetscCall(PetscFree(colidxs));
1184 
1185   /* fill in and store nonzero values */
1186   PetscCall(PetscMalloc1(nz, &matvals));
1187   for (cnt = 0, i = 0; i < A->mbs; i++) {
1188     for (k = 0; k < bs; k++) {
1189       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1190         if (garray[B->j[jb]] > cs / bs) break;
1191         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1192       }
1193       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1194         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1195       for (; jb < B->i[i + 1]; jb++)
1196         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1197     }
1198   }
1199   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1200   PetscCall(PetscFree(matvals));
1201 
1202   /* write block size option to the viewer's .info file */
1203   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1204   PetscFunctionReturn(PETSC_SUCCESS);
1205 }
1206 
1207 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1208 {
1209   PetscBool iascii, isdraw, issocket, isbinary;
1210 
1211   PetscFunctionBegin;
1212   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1213   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1214   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1215   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1216   if (iascii || isdraw || issocket) {
1217     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1218   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1219   PetscFunctionReturn(PETSC_SUCCESS);
1220 }
1221 
1222 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1223 {
1224   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1225   PetscInt     nt;
1226 
1227   PetscFunctionBegin;
1228   PetscCall(VecGetLocalSize(xx, &nt));
1229   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1230   PetscCall(VecGetLocalSize(yy, &nt));
1231   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1232   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1233   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1234   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1235   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1236   PetscFunctionReturn(PETSC_SUCCESS);
1237 }
1238 
1239 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1240 {
1241   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1242 
1243   PetscFunctionBegin;
1244   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1245   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1246   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1247   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1248   PetscFunctionReturn(PETSC_SUCCESS);
1249 }
1250 
1251 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1252 {
1253   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1254 
1255   PetscFunctionBegin;
1256   /* do nondiagonal part */
1257   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1258   /* do local part */
1259   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1260   /* add partial results together */
1261   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1262   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1263   PetscFunctionReturn(PETSC_SUCCESS);
1264 }
1265 
1266 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1267 {
1268   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1269 
1270   PetscFunctionBegin;
1271   /* do nondiagonal part */
1272   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1273   /* do local part */
1274   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1275   /* add partial results together */
1276   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1277   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1278   PetscFunctionReturn(PETSC_SUCCESS);
1279 }
1280 
1281 /*
1282   This only works correctly for square matrices where the subblock A->A is the
1283    diagonal block
1284 */
1285 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1286 {
1287   PetscFunctionBegin;
1288   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1289   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1290   PetscFunctionReturn(PETSC_SUCCESS);
1291 }
1292 
1293 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1294 {
1295   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1296 
1297   PetscFunctionBegin;
1298   PetscCall(MatScale(a->A, aa));
1299   PetscCall(MatScale(a->B, aa));
1300   PetscFunctionReturn(PETSC_SUCCESS);
1301 }
1302 
1303 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1304 {
1305   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1306   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1307   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1308   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1309   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1310 
1311   PetscFunctionBegin;
1312   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1313   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1314   mat->getrowactive = PETSC_TRUE;
1315 
1316   if (!mat->rowvalues && (idx || v)) {
1317     /*
1318         allocate enough space to hold information from the longest row.
1319     */
1320     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1321     PetscInt     max = 1, mbs = mat->mbs, tmp;
1322     for (i = 0; i < mbs; i++) {
1323       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1324       if (max < tmp) max = tmp;
1325     }
1326     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1327   }
1328   lrow = row - brstart;
1329 
1330   pvA = &vworkA;
1331   pcA = &cworkA;
1332   pvB = &vworkB;
1333   pcB = &cworkB;
1334   if (!v) {
1335     pvA = NULL;
1336     pvB = NULL;
1337   }
1338   if (!idx) {
1339     pcA = NULL;
1340     if (!v) pcB = NULL;
1341   }
1342   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1343   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1344   nztot = nzA + nzB;
1345 
1346   cmap = mat->garray;
1347   if (v || idx) {
1348     if (nztot) {
1349       /* Sort by increasing column numbers, assuming A and B already sorted */
1350       PetscInt imark = -1;
1351       if (v) {
1352         *v = v_p = mat->rowvalues;
1353         for (i = 0; i < nzB; i++) {
1354           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1355           else break;
1356         }
1357         imark = i;
1358         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1359         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1360       }
1361       if (idx) {
1362         *idx = idx_p = mat->rowindices;
1363         if (imark > -1) {
1364           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1365         } else {
1366           for (i = 0; i < nzB; i++) {
1367             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1368             else break;
1369           }
1370           imark = i;
1371         }
1372         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1373         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1374       }
1375     } else {
1376       if (idx) *idx = NULL;
1377       if (v) *v = NULL;
1378     }
1379   }
1380   *nz = nztot;
1381   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1382   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1383   PetscFunctionReturn(PETSC_SUCCESS);
1384 }
1385 
1386 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1387 {
1388   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1389 
1390   PetscFunctionBegin;
1391   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1392   baij->getrowactive = PETSC_FALSE;
1393   PetscFunctionReturn(PETSC_SUCCESS);
1394 }
1395 
1396 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1397 {
1398   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1399 
1400   PetscFunctionBegin;
1401   PetscCall(MatZeroEntries(l->A));
1402   PetscCall(MatZeroEntries(l->B));
1403   PetscFunctionReturn(PETSC_SUCCESS);
1404 }
1405 
1406 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1407 {
1408   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1409   Mat            A = a->A, B = a->B;
1410   PetscLogDouble isend[5], irecv[5];
1411 
1412   PetscFunctionBegin;
1413   info->block_size = (PetscReal)matin->rmap->bs;
1414 
1415   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1416 
1417   isend[0] = info->nz_used;
1418   isend[1] = info->nz_allocated;
1419   isend[2] = info->nz_unneeded;
1420   isend[3] = info->memory;
1421   isend[4] = info->mallocs;
1422 
1423   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1424 
1425   isend[0] += info->nz_used;
1426   isend[1] += info->nz_allocated;
1427   isend[2] += info->nz_unneeded;
1428   isend[3] += info->memory;
1429   isend[4] += info->mallocs;
1430 
1431   if (flag == MAT_LOCAL) {
1432     info->nz_used      = isend[0];
1433     info->nz_allocated = isend[1];
1434     info->nz_unneeded  = isend[2];
1435     info->memory       = isend[3];
1436     info->mallocs      = isend[4];
1437   } else if (flag == MAT_GLOBAL_MAX) {
1438     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1439 
1440     info->nz_used      = irecv[0];
1441     info->nz_allocated = irecv[1];
1442     info->nz_unneeded  = irecv[2];
1443     info->memory       = irecv[3];
1444     info->mallocs      = irecv[4];
1445   } else if (flag == MAT_GLOBAL_SUM) {
1446     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1447 
1448     info->nz_used      = irecv[0];
1449     info->nz_allocated = irecv[1];
1450     info->nz_unneeded  = irecv[2];
1451     info->memory       = irecv[3];
1452     info->mallocs      = irecv[4];
1453   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1454   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1455   info->fill_ratio_needed = 0;
1456   info->factor_mallocs    = 0;
1457   PetscFunctionReturn(PETSC_SUCCESS);
1458 }
1459 
1460 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1461 {
1462   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1463 
1464   PetscFunctionBegin;
1465   switch (op) {
1466   case MAT_NEW_NONZERO_LOCATIONS:
1467   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1468   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1469   case MAT_KEEP_NONZERO_PATTERN:
1470   case MAT_NEW_NONZERO_LOCATION_ERR:
1471     MatCheckPreallocated(A, 1);
1472     PetscCall(MatSetOption(a->A, op, flg));
1473     PetscCall(MatSetOption(a->B, op, flg));
1474     break;
1475   case MAT_ROW_ORIENTED:
1476     MatCheckPreallocated(A, 1);
1477     a->roworiented = flg;
1478 
1479     PetscCall(MatSetOption(a->A, op, flg));
1480     PetscCall(MatSetOption(a->B, op, flg));
1481     break;
1482   case MAT_IGNORE_OFF_PROC_ENTRIES:
1483     a->donotstash = flg;
1484     break;
1485   case MAT_USE_HASH_TABLE:
1486     a->ht_flag = flg;
1487     a->ht_fact = 1.39;
1488     break;
1489   case MAT_SYMMETRIC:
1490   case MAT_STRUCTURALLY_SYMMETRIC:
1491   case MAT_HERMITIAN:
1492   case MAT_SYMMETRY_ETERNAL:
1493   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1494   case MAT_SPD_ETERNAL:
1495     /* if the diagonal matrix is square it inherits some of the properties above */
1496     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1497     break;
1498   default:
1499     break;
1500   }
1501   PetscFunctionReturn(PETSC_SUCCESS);
1502 }
1503 
1504 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1505 {
1506   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1507   Mat_SeqBAIJ *Aloc;
1508   Mat          B;
1509   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1510   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1511   MatScalar   *a;
1512 
1513   PetscFunctionBegin;
1514   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1515   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1516     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1517     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1518     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1519     /* Do not know preallocation information, but must set block size */
1520     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1521   } else {
1522     B = *matout;
1523   }
1524 
1525   /* copy over the A part */
1526   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1527   ai   = Aloc->i;
1528   aj   = Aloc->j;
1529   a    = Aloc->a;
1530   PetscCall(PetscMalloc1(bs, &rvals));
1531 
1532   for (i = 0; i < mbs; i++) {
1533     rvals[0] = bs * (baij->rstartbs + i);
1534     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1535     for (j = ai[i]; j < ai[i + 1]; j++) {
1536       col = (baij->cstartbs + aj[j]) * bs;
1537       for (k = 0; k < bs; k++) {
1538         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1539 
1540         col++;
1541         a += bs;
1542       }
1543     }
1544   }
1545   /* copy over the B part */
1546   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1547   ai   = Aloc->i;
1548   aj   = Aloc->j;
1549   a    = Aloc->a;
1550   for (i = 0; i < mbs; i++) {
1551     rvals[0] = bs * (baij->rstartbs + i);
1552     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1553     for (j = ai[i]; j < ai[i + 1]; j++) {
1554       col = baij->garray[aj[j]] * bs;
1555       for (k = 0; k < bs; k++) {
1556         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1557         col++;
1558         a += bs;
1559       }
1560     }
1561   }
1562   PetscCall(PetscFree(rvals));
1563   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1564   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1565 
1566   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1567   else PetscCall(MatHeaderMerge(A, &B));
1568   PetscFunctionReturn(PETSC_SUCCESS);
1569 }
1570 
1571 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1572 {
1573   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1574   Mat          a = baij->A, b = baij->B;
1575   PetscInt     s1, s2, s3;
1576 
1577   PetscFunctionBegin;
1578   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1579   if (rr) {
1580     PetscCall(VecGetLocalSize(rr, &s1));
1581     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1582     /* Overlap communication with computation. */
1583     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1584   }
1585   if (ll) {
1586     PetscCall(VecGetLocalSize(ll, &s1));
1587     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1588     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1589   }
1590   /* scale  the diagonal block */
1591   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1592 
1593   if (rr) {
1594     /* Do a scatter end and then right scale the off-diagonal block */
1595     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1596     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1597   }
1598   PetscFunctionReturn(PETSC_SUCCESS);
1599 }
1600 
1601 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1602 {
1603   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1604   PetscInt    *lrows;
1605   PetscInt     r, len;
1606   PetscBool    cong;
1607 
1608   PetscFunctionBegin;
1609   /* get locally owned rows */
1610   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1611   /* fix right-hand side if needed */
1612   if (x && b) {
1613     const PetscScalar *xx;
1614     PetscScalar       *bb;
1615 
1616     PetscCall(VecGetArrayRead(x, &xx));
1617     PetscCall(VecGetArray(b, &bb));
1618     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1619     PetscCall(VecRestoreArrayRead(x, &xx));
1620     PetscCall(VecRestoreArray(b, &bb));
1621   }
1622 
1623   /* actually zap the local rows */
1624   /*
1625         Zero the required rows. If the "diagonal block" of the matrix
1626      is square and the user wishes to set the diagonal we use separate
1627      code so that MatSetValues() is not called for each diagonal allocating
1628      new memory, thus calling lots of mallocs and slowing things down.
1629 
1630   */
1631   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1632   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1633   PetscCall(MatHasCongruentLayouts(A, &cong));
1634   if ((diag != 0.0) && cong) {
1635     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1636   } else if (diag != 0.0) {
1637     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1638     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR");
1639     for (r = 0; r < len; ++r) {
1640       const PetscInt row = lrows[r] + A->rmap->rstart;
1641       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1642     }
1643     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1644     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1645   } else {
1646     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1647   }
1648   PetscCall(PetscFree(lrows));
1649 
1650   /* only change matrix nonzero state if pattern was allowed to be changed */
1651   if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) {
1652     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1653     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1654   }
1655   PetscFunctionReturn(PETSC_SUCCESS);
1656 }
1657 
1658 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1659 {
1660   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1661   PetscMPIInt        n, p = 0;
1662   PetscInt           i, j, k, r, len = 0, row, col, count;
1663   PetscInt          *lrows, *owners = A->rmap->range;
1664   PetscSFNode       *rrows;
1665   PetscSF            sf;
1666   const PetscScalar *xx;
1667   PetscScalar       *bb, *mask;
1668   Vec                xmask, lmask;
1669   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1670   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1671   PetscScalar       *aa;
1672 
1673   PetscFunctionBegin;
1674   PetscCall(PetscMPIIntCast(A->rmap->n, &n));
1675   /* Create SF where leaves are input rows and roots are owned rows */
1676   PetscCall(PetscMalloc1(n, &lrows));
1677   for (r = 0; r < n; ++r) lrows[r] = -1;
1678   PetscCall(PetscMalloc1(N, &rrows));
1679   for (r = 0; r < N; ++r) {
1680     const PetscInt idx = rows[r];
1681     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1682     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1683       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1684     }
1685     rrows[r].rank  = p;
1686     rrows[r].index = rows[r] - owners[p];
1687   }
1688   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1689   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1690   /* Collect flags for rows to be zeroed */
1691   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1692   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1693   PetscCall(PetscSFDestroy(&sf));
1694   /* Compress and put in row numbers */
1695   for (r = 0; r < n; ++r)
1696     if (lrows[r] >= 0) lrows[len++] = r;
1697   /* zero diagonal part of matrix */
1698   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1699   /* handle off-diagonal part of matrix */
1700   PetscCall(MatCreateVecs(A, &xmask, NULL));
1701   PetscCall(VecDuplicate(l->lvec, &lmask));
1702   PetscCall(VecGetArray(xmask, &bb));
1703   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1704   PetscCall(VecRestoreArray(xmask, &bb));
1705   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1706   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1707   PetscCall(VecDestroy(&xmask));
1708   if (x) {
1709     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1710     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1711     PetscCall(VecGetArrayRead(l->lvec, &xx));
1712     PetscCall(VecGetArray(b, &bb));
1713   }
1714   PetscCall(VecGetArray(lmask, &mask));
1715   /* remove zeroed rows of off-diagonal matrix */
1716   for (i = 0; i < len; ++i) {
1717     row   = lrows[i];
1718     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1719     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
1720     for (k = 0; k < count; ++k) {
1721       aa[0] = 0.0;
1722       aa += bs;
1723     }
1724   }
1725   /* loop over all elements of off process part of matrix zeroing removed columns*/
1726   for (i = 0; i < l->B->rmap->N; ++i) {
1727     row = i / bs;
1728     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1729       for (k = 0; k < bs; ++k) {
1730         col = bs * baij->j[j] + k;
1731         if (PetscAbsScalar(mask[col])) {
1732           aa = baij->a + j * bs2 + (i % bs) + bs * k;
1733           if (x) bb[i] -= aa[0] * xx[col];
1734           aa[0] = 0.0;
1735         }
1736       }
1737     }
1738   }
1739   if (x) {
1740     PetscCall(VecRestoreArray(b, &bb));
1741     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1742   }
1743   PetscCall(VecRestoreArray(lmask, &mask));
1744   PetscCall(VecDestroy(&lmask));
1745   PetscCall(PetscFree(lrows));
1746 
1747   /* only change matrix nonzero state if pattern was allowed to be changed */
1748   if (!((Mat_SeqBAIJ *)l->A->data)->nonew) {
1749     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1750     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1751   }
1752   PetscFunctionReturn(PETSC_SUCCESS);
1753 }
1754 
1755 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1756 {
1757   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1758 
1759   PetscFunctionBegin;
1760   PetscCall(MatSetUnfactored(a->A));
1761   PetscFunctionReturn(PETSC_SUCCESS);
1762 }
1763 
1764 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1765 
1766 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1767 {
1768   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1769   Mat          a, b, c, d;
1770   PetscBool    flg;
1771 
1772   PetscFunctionBegin;
1773   a = matA->A;
1774   b = matA->B;
1775   c = matB->A;
1776   d = matB->B;
1777 
1778   PetscCall(MatEqual(a, c, &flg));
1779   if (flg) PetscCall(MatEqual(b, d, &flg));
1780   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1781   PetscFunctionReturn(PETSC_SUCCESS);
1782 }
1783 
1784 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1785 {
1786   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1787   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1788 
1789   PetscFunctionBegin;
1790   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1791   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1792     PetscCall(MatCopy_Basic(A, B, str));
1793   } else {
1794     PetscCall(MatCopy(a->A, b->A, str));
1795     PetscCall(MatCopy(a->B, b->B, str));
1796   }
1797   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1798   PetscFunctionReturn(PETSC_SUCCESS);
1799 }
1800 
1801 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1802 {
1803   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1804   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1805   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1806 
1807   PetscFunctionBegin;
1808   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1809   PetscFunctionReturn(PETSC_SUCCESS);
1810 }
1811 
1812 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1813 {
1814   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1815   PetscBLASInt bnz, one                         = 1;
1816   Mat_SeqBAIJ *x, *y;
1817   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1818 
1819   PetscFunctionBegin;
1820   if (str == SAME_NONZERO_PATTERN) {
1821     PetscScalar alpha = a;
1822     x                 = (Mat_SeqBAIJ *)xx->A->data;
1823     y                 = (Mat_SeqBAIJ *)yy->A->data;
1824     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1825     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1826     x = (Mat_SeqBAIJ *)xx->B->data;
1827     y = (Mat_SeqBAIJ *)yy->B->data;
1828     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1829     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1830     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1831   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1832     PetscCall(MatAXPY_Basic(Y, a, X, str));
1833   } else {
1834     Mat       B;
1835     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1836     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1837     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1838     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1839     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1840     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1841     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1842     PetscCall(MatSetType(B, MATMPIBAIJ));
1843     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1844     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1845     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1846     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1847     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1848     PetscCall(MatHeaderMerge(Y, &B));
1849     PetscCall(PetscFree(nnz_d));
1850     PetscCall(PetscFree(nnz_o));
1851   }
1852   PetscFunctionReturn(PETSC_SUCCESS);
1853 }
1854 
1855 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1856 {
1857   PetscFunctionBegin;
1858   if (PetscDefined(USE_COMPLEX)) {
1859     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1860 
1861     PetscCall(MatConjugate_SeqBAIJ(a->A));
1862     PetscCall(MatConjugate_SeqBAIJ(a->B));
1863   }
1864   PetscFunctionReturn(PETSC_SUCCESS);
1865 }
1866 
1867 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1868 {
1869   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1870 
1871   PetscFunctionBegin;
1872   PetscCall(MatRealPart(a->A));
1873   PetscCall(MatRealPart(a->B));
1874   PetscFunctionReturn(PETSC_SUCCESS);
1875 }
1876 
1877 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1878 {
1879   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1880 
1881   PetscFunctionBegin;
1882   PetscCall(MatImaginaryPart(a->A));
1883   PetscCall(MatImaginaryPart(a->B));
1884   PetscFunctionReturn(PETSC_SUCCESS);
1885 }
1886 
1887 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1888 {
1889   IS       iscol_local;
1890   PetscInt csize;
1891 
1892   PetscFunctionBegin;
1893   PetscCall(ISGetLocalSize(iscol, &csize));
1894   if (call == MAT_REUSE_MATRIX) {
1895     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1896     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1897   } else {
1898     PetscCall(ISAllGather(iscol, &iscol_local));
1899   }
1900   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE));
1901   if (call == MAT_INITIAL_MATRIX) {
1902     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1903     PetscCall(ISDestroy(&iscol_local));
1904   }
1905   PetscFunctionReturn(PETSC_SUCCESS);
1906 }
1907 
1908 /*
1909   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1910   in local and then by concatenating the local matrices the end result.
1911   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1912   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1913 */
1914 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym)
1915 {
1916   PetscMPIInt  rank, size;
1917   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1918   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1919   Mat          M, Mreuse;
1920   MatScalar   *vwork, *aa;
1921   MPI_Comm     comm;
1922   IS           isrow_new, iscol_new;
1923   Mat_SeqBAIJ *aij;
1924 
1925   PetscFunctionBegin;
1926   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1927   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1928   PetscCallMPI(MPI_Comm_size(comm, &size));
1929   /* The compression and expansion should be avoided. Doesn't point
1930      out errors, might change the indices, hence buggey */
1931   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1932   if (isrow == iscol) {
1933     iscol_new = isrow_new;
1934     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1935   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1936 
1937   if (call == MAT_REUSE_MATRIX) {
1938     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1939     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1940     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym));
1941   } else {
1942     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym));
1943   }
1944   PetscCall(ISDestroy(&isrow_new));
1945   PetscCall(ISDestroy(&iscol_new));
1946   /*
1947       m - number of local rows
1948       n - number of columns (same on all processors)
1949       rstart - first row in new global matrix generated
1950   */
1951   PetscCall(MatGetBlockSize(mat, &bs));
1952   PetscCall(MatGetSize(Mreuse, &m, &n));
1953   m = m / bs;
1954   n = n / bs;
1955 
1956   if (call == MAT_INITIAL_MATRIX) {
1957     aij = (Mat_SeqBAIJ *)Mreuse->data;
1958     ii  = aij->i;
1959     jj  = aij->j;
1960 
1961     /*
1962         Determine the number of non-zeros in the diagonal and off-diagonal
1963         portions of the matrix in order to do correct preallocation
1964     */
1965 
1966     /* first get start and end of "diagonal" columns */
1967     if (csize == PETSC_DECIDE) {
1968       PetscCall(ISGetSize(isrow, &mglobal));
1969       if (mglobal == n * bs) { /* square matrix */
1970         nlocal = m;
1971       } else {
1972         nlocal = n / size + ((n % size) > rank);
1973       }
1974     } else {
1975       nlocal = csize / bs;
1976     }
1977     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1978     rstart = rend - nlocal;
1979     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1980 
1981     /* next, compute all the lengths */
1982     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1983     for (i = 0; i < m; i++) {
1984       jend = ii[i + 1] - ii[i];
1985       olen = 0;
1986       dlen = 0;
1987       for (j = 0; j < jend; j++) {
1988         if (*jj < rstart || *jj >= rend) olen++;
1989         else dlen++;
1990         jj++;
1991       }
1992       olens[i] = olen;
1993       dlens[i] = dlen;
1994     }
1995     PetscCall(MatCreate(comm, &M));
1996     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
1997     PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ));
1998     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
1999     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2000     PetscCall(PetscFree2(dlens, olens));
2001   } else {
2002     PetscInt ml, nl;
2003 
2004     M = *newmat;
2005     PetscCall(MatGetLocalSize(M, &ml, &nl));
2006     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2007     PetscCall(MatZeroEntries(M));
2008     /*
2009          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2010        rather than the slower MatSetValues().
2011     */
2012     M->was_assembled = PETSC_TRUE;
2013     M->assembled     = PETSC_FALSE;
2014   }
2015   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2016   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2017   aij = (Mat_SeqBAIJ *)Mreuse->data;
2018   ii  = aij->i;
2019   jj  = aij->j;
2020   aa  = aij->a;
2021   for (i = 0; i < m; i++) {
2022     row   = rstart / bs + i;
2023     nz    = ii[i + 1] - ii[i];
2024     cwork = jj;
2025     jj    = PetscSafePointerPlusOffset(jj, nz);
2026     vwork = aa;
2027     aa    = PetscSafePointerPlusOffset(aa, nz * bs * bs);
2028     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2029   }
2030 
2031   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2032   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2033   *newmat = M;
2034 
2035   /* save submatrix used in processor for next request */
2036   if (call == MAT_INITIAL_MATRIX) {
2037     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2038     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2039   }
2040   PetscFunctionReturn(PETSC_SUCCESS);
2041 }
2042 
2043 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2044 {
2045   MPI_Comm        comm, pcomm;
2046   PetscInt        clocal_size, nrows;
2047   const PetscInt *rows;
2048   PetscMPIInt     size;
2049   IS              crowp, lcolp;
2050 
2051   PetscFunctionBegin;
2052   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2053   /* make a collective version of 'rowp' */
2054   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2055   if (pcomm == comm) {
2056     crowp = rowp;
2057   } else {
2058     PetscCall(ISGetSize(rowp, &nrows));
2059     PetscCall(ISGetIndices(rowp, &rows));
2060     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2061     PetscCall(ISRestoreIndices(rowp, &rows));
2062   }
2063   PetscCall(ISSetPermutation(crowp));
2064   /* make a local version of 'colp' */
2065   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2066   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2067   if (size == 1) {
2068     lcolp = colp;
2069   } else {
2070     PetscCall(ISAllGather(colp, &lcolp));
2071   }
2072   PetscCall(ISSetPermutation(lcolp));
2073   /* now we just get the submatrix */
2074   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2075   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE));
2076   /* clean up */
2077   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2078   if (size > 1) PetscCall(ISDestroy(&lcolp));
2079   PetscFunctionReturn(PETSC_SUCCESS);
2080 }
2081 
2082 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2083 {
2084   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2085   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2086 
2087   PetscFunctionBegin;
2088   if (nghosts) *nghosts = B->nbs;
2089   if (ghosts) *ghosts = baij->garray;
2090   PetscFunctionReturn(PETSC_SUCCESS);
2091 }
2092 
2093 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2094 {
2095   Mat          B;
2096   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2097   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2098   Mat_SeqAIJ  *b;
2099   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2100   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2101   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2102 
2103   PetscFunctionBegin;
2104   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2105   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2106 
2107   /*   Tell every processor the number of nonzeros per row  */
2108   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2109   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2110   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2111   displs = recvcounts + size;
2112   for (i = 0; i < size; i++) {
2113     PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i]));
2114     PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i]));
2115   }
2116   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2117   /* Create the sequential matrix of the same type as the local block diagonal  */
2118   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2119   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2120   PetscCall(MatSetType(B, MATSEQAIJ));
2121   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2122   b = (Mat_SeqAIJ *)B->data;
2123 
2124   /*     Copy my part of matrix column indices over  */
2125   sendcount  = ad->nz + bd->nz;
2126   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2127   a_jsendbuf = ad->j;
2128   b_jsendbuf = bd->j;
2129   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2130   cnt        = 0;
2131   for (i = 0; i < n; i++) {
2132     /* put in lower diagonal portion */
2133     m = bd->i[i + 1] - bd->i[i];
2134     while (m > 0) {
2135       /* is it above diagonal (in bd (compressed) numbering) */
2136       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2137       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2138       m--;
2139     }
2140 
2141     /* put in diagonal portion */
2142     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2143 
2144     /* put in upper diagonal portion */
2145     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2146   }
2147   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2148 
2149   /*  Gather all column indices to all processors  */
2150   for (i = 0; i < size; i++) {
2151     recvcounts[i] = 0;
2152     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2153   }
2154   displs[0] = 0;
2155   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2156   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2157   /*  Assemble the matrix into usable form (note numerical values not yet set)  */
2158   /* set the b->ilen (length of each row) values */
2159   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2160   /* set the b->i indices */
2161   b->i[0] = 0;
2162   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2163   PetscCall(PetscFree(lens));
2164   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2165   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2166   PetscCall(PetscFree(recvcounts));
2167 
2168   PetscCall(MatPropagateSymmetryOptions(A, B));
2169   *newmat = B;
2170   PetscFunctionReturn(PETSC_SUCCESS);
2171 }
2172 
2173 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2174 {
2175   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2176   Vec          bb1 = NULL;
2177 
2178   PetscFunctionBegin;
2179   if (flag == SOR_APPLY_UPPER) {
2180     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2181     PetscFunctionReturn(PETSC_SUCCESS);
2182   }
2183 
2184   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2185 
2186   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2187     if (flag & SOR_ZERO_INITIAL_GUESS) {
2188       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2189       its--;
2190     }
2191 
2192     while (its--) {
2193       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2194       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2195 
2196       /* update rhs: bb1 = bb - B*x */
2197       PetscCall(VecScale(mat->lvec, -1.0));
2198       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2199 
2200       /* local sweep */
2201       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2202     }
2203   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2204     if (flag & SOR_ZERO_INITIAL_GUESS) {
2205       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2206       its--;
2207     }
2208     while (its--) {
2209       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2210       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2211 
2212       /* update rhs: bb1 = bb - B*x */
2213       PetscCall(VecScale(mat->lvec, -1.0));
2214       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2215 
2216       /* local sweep */
2217       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2218     }
2219   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2220     if (flag & SOR_ZERO_INITIAL_GUESS) {
2221       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2222       its--;
2223     }
2224     while (its--) {
2225       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2226       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2227 
2228       /* update rhs: bb1 = bb - B*x */
2229       PetscCall(VecScale(mat->lvec, -1.0));
2230       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2231 
2232       /* local sweep */
2233       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2234     }
2235   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2236 
2237   PetscCall(VecDestroy(&bb1));
2238   PetscFunctionReturn(PETSC_SUCCESS);
2239 }
2240 
2241 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2242 {
2243   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2244   PetscInt     m, N, i, *garray = aij->garray;
2245   PetscInt     ib, jb, bs = A->rmap->bs;
2246   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2247   MatScalar   *a_val = a_aij->a;
2248   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2249   MatScalar   *b_val = b_aij->a;
2250   PetscReal   *work;
2251 
2252   PetscFunctionBegin;
2253   PetscCall(MatGetSize(A, &m, &N));
2254   PetscCall(PetscCalloc1(N, &work));
2255   if (type == NORM_2) {
2256     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2257       for (jb = 0; jb < bs; jb++) {
2258         for (ib = 0; ib < bs; ib++) {
2259           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2260           a_val++;
2261         }
2262       }
2263     }
2264     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2265       for (jb = 0; jb < bs; jb++) {
2266         for (ib = 0; ib < bs; ib++) {
2267           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2268           b_val++;
2269         }
2270       }
2271     }
2272   } else if (type == NORM_1) {
2273     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2274       for (jb = 0; jb < bs; jb++) {
2275         for (ib = 0; ib < bs; ib++) {
2276           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2277           a_val++;
2278         }
2279       }
2280     }
2281     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2282       for (jb = 0; jb < bs; jb++) {
2283         for (ib = 0; ib < bs; ib++) {
2284           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2285           b_val++;
2286         }
2287       }
2288     }
2289   } else if (type == NORM_INFINITY) {
2290     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2291       for (jb = 0; jb < bs; jb++) {
2292         for (ib = 0; ib < bs; ib++) {
2293           PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2294           work[col]    = PetscMax(PetscAbsScalar(*a_val), work[col]);
2295           a_val++;
2296         }
2297       }
2298     }
2299     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2300       for (jb = 0; jb < bs; jb++) {
2301         for (ib = 0; ib < bs; ib++) {
2302           PetscInt col = garray[b_aij->j[i]] * bs + jb;
2303           work[col]    = PetscMax(PetscAbsScalar(*b_val), work[col]);
2304           b_val++;
2305         }
2306       }
2307     }
2308   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2309     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2310       for (jb = 0; jb < bs; jb++) {
2311         for (ib = 0; ib < bs; ib++) {
2312           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2313           a_val++;
2314         }
2315       }
2316     }
2317     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2318       for (jb = 0; jb < bs; jb++) {
2319         for (ib = 0; ib < bs; ib++) {
2320           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2321           b_val++;
2322         }
2323       }
2324     }
2325   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2326     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2327       for (jb = 0; jb < bs; jb++) {
2328         for (ib = 0; ib < bs; ib++) {
2329           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2330           a_val++;
2331         }
2332       }
2333     }
2334     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2335       for (jb = 0; jb < bs; jb++) {
2336         for (ib = 0; ib < bs; ib++) {
2337           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2338           b_val++;
2339         }
2340       }
2341     }
2342   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2343   if (type == NORM_INFINITY) {
2344     PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2345   } else {
2346     PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2347   }
2348   PetscCall(PetscFree(work));
2349   if (type == NORM_2) {
2350     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2351   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2352     for (i = 0; i < N; i++) reductions[i] /= m;
2353   }
2354   PetscFunctionReturn(PETSC_SUCCESS);
2355 }
2356 
2357 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2358 {
2359   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2360 
2361   PetscFunctionBegin;
2362   PetscCall(MatInvertBlockDiagonal(a->A, values));
2363   A->factorerrortype             = a->A->factorerrortype;
2364   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2365   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2366   PetscFunctionReturn(PETSC_SUCCESS);
2367 }
2368 
2369 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2370 {
2371   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2372   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2373 
2374   PetscFunctionBegin;
2375   if (!Y->preallocated) {
2376     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2377   } else if (!aij->nz) {
2378     PetscInt nonew = aij->nonew;
2379     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2380     aij->nonew = nonew;
2381   }
2382   PetscCall(MatShift_Basic(Y, a));
2383   PetscFunctionReturn(PETSC_SUCCESS);
2384 }
2385 
2386 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d)
2387 {
2388   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2389 
2390   PetscFunctionBegin;
2391   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2392   PetscCall(MatMissingDiagonal(a->A, missing, d));
2393   if (d) {
2394     PetscInt rstart;
2395     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2396     *d += rstart / A->rmap->bs;
2397   }
2398   PetscFunctionReturn(PETSC_SUCCESS);
2399 }
2400 
2401 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2402 {
2403   PetscFunctionBegin;
2404   *a = ((Mat_MPIBAIJ *)A->data)->A;
2405   PetscFunctionReturn(PETSC_SUCCESS);
2406 }
2407 
2408 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2409 {
2410   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2411 
2412   PetscFunctionBegin;
2413   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2414   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2415   PetscFunctionReturn(PETSC_SUCCESS);
2416 }
2417 
2418 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2419                                        MatGetRow_MPIBAIJ,
2420                                        MatRestoreRow_MPIBAIJ,
2421                                        MatMult_MPIBAIJ,
2422                                        /* 4*/ MatMultAdd_MPIBAIJ,
2423                                        MatMultTranspose_MPIBAIJ,
2424                                        MatMultTransposeAdd_MPIBAIJ,
2425                                        NULL,
2426                                        NULL,
2427                                        NULL,
2428                                        /*10*/ NULL,
2429                                        NULL,
2430                                        NULL,
2431                                        MatSOR_MPIBAIJ,
2432                                        MatTranspose_MPIBAIJ,
2433                                        /*15*/ MatGetInfo_MPIBAIJ,
2434                                        MatEqual_MPIBAIJ,
2435                                        MatGetDiagonal_MPIBAIJ,
2436                                        MatDiagonalScale_MPIBAIJ,
2437                                        MatNorm_MPIBAIJ,
2438                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2439                                        MatAssemblyEnd_MPIBAIJ,
2440                                        MatSetOption_MPIBAIJ,
2441                                        MatZeroEntries_MPIBAIJ,
2442                                        /*24*/ MatZeroRows_MPIBAIJ,
2443                                        NULL,
2444                                        NULL,
2445                                        NULL,
2446                                        NULL,
2447                                        /*29*/ MatSetUp_MPI_Hash,
2448                                        NULL,
2449                                        NULL,
2450                                        MatGetDiagonalBlock_MPIBAIJ,
2451                                        NULL,
2452                                        /*34*/ MatDuplicate_MPIBAIJ,
2453                                        NULL,
2454                                        NULL,
2455                                        NULL,
2456                                        NULL,
2457                                        /*39*/ MatAXPY_MPIBAIJ,
2458                                        MatCreateSubMatrices_MPIBAIJ,
2459                                        MatIncreaseOverlap_MPIBAIJ,
2460                                        MatGetValues_MPIBAIJ,
2461                                        MatCopy_MPIBAIJ,
2462                                        /*44*/ NULL,
2463                                        MatScale_MPIBAIJ,
2464                                        MatShift_MPIBAIJ,
2465                                        NULL,
2466                                        MatZeroRowsColumns_MPIBAIJ,
2467                                        /*49*/ NULL,
2468                                        NULL,
2469                                        NULL,
2470                                        NULL,
2471                                        NULL,
2472                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2473                                        NULL,
2474                                        MatSetUnfactored_MPIBAIJ,
2475                                        MatPermute_MPIBAIJ,
2476                                        MatSetValuesBlocked_MPIBAIJ,
2477                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2478                                        MatDestroy_MPIBAIJ,
2479                                        MatView_MPIBAIJ,
2480                                        NULL,
2481                                        NULL,
2482                                        /*64*/ NULL,
2483                                        NULL,
2484                                        NULL,
2485                                        NULL,
2486                                        NULL,
2487                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2488                                        NULL,
2489                                        NULL,
2490                                        NULL,
2491                                        NULL,
2492                                        /*74*/ NULL,
2493                                        MatFDColoringApply_BAIJ,
2494                                        NULL,
2495                                        NULL,
2496                                        NULL,
2497                                        /*79*/ NULL,
2498                                        NULL,
2499                                        NULL,
2500                                        NULL,
2501                                        MatLoad_MPIBAIJ,
2502                                        /*84*/ NULL,
2503                                        NULL,
2504                                        NULL,
2505                                        NULL,
2506                                        NULL,
2507                                        /*89*/ NULL,
2508                                        NULL,
2509                                        NULL,
2510                                        NULL,
2511                                        NULL,
2512                                        /*94*/ NULL,
2513                                        NULL,
2514                                        NULL,
2515                                        NULL,
2516                                        NULL,
2517                                        /*99*/ NULL,
2518                                        NULL,
2519                                        NULL,
2520                                        MatConjugate_MPIBAIJ,
2521                                        NULL,
2522                                        /*104*/ NULL,
2523                                        MatRealPart_MPIBAIJ,
2524                                        MatImaginaryPart_MPIBAIJ,
2525                                        NULL,
2526                                        NULL,
2527                                        /*109*/ NULL,
2528                                        NULL,
2529                                        NULL,
2530                                        NULL,
2531                                        MatMissingDiagonal_MPIBAIJ,
2532                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2533                                        NULL,
2534                                        MatGetGhosts_MPIBAIJ,
2535                                        NULL,
2536                                        NULL,
2537                                        /*119*/ NULL,
2538                                        NULL,
2539                                        NULL,
2540                                        NULL,
2541                                        MatGetMultiProcBlock_MPIBAIJ,
2542                                        /*124*/ NULL,
2543                                        MatGetColumnReductions_MPIBAIJ,
2544                                        MatInvertBlockDiagonal_MPIBAIJ,
2545                                        NULL,
2546                                        NULL,
2547                                        /*129*/ NULL,
2548                                        NULL,
2549                                        NULL,
2550                                        NULL,
2551                                        NULL,
2552                                        /*134*/ NULL,
2553                                        NULL,
2554                                        NULL,
2555                                        NULL,
2556                                        NULL,
2557                                        /*139*/ MatSetBlockSizes_Default,
2558                                        NULL,
2559                                        NULL,
2560                                        MatFDColoringSetUp_MPIXAIJ,
2561                                        NULL,
2562                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2563                                        NULL,
2564                                        NULL,
2565                                        NULL,
2566                                        NULL,
2567                                        NULL,
2568                                        /*150*/ NULL,
2569                                        MatEliminateZeros_MPIBAIJ,
2570                                        MatGetRowSumAbs_MPIBAIJ,
2571                                        NULL,
2572                                        NULL,
2573                                        /*155*/ NULL,
2574                                        MatCopyHashToXAIJ_MPI_Hash};
2575 
2576 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2577 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2578 
2579 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2580 {
2581   PetscInt        m, rstart, cstart, cend;
2582   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2583   const PetscInt *JJ          = NULL;
2584   PetscScalar    *values      = NULL;
2585   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2586   PetscBool       nooffprocentries;
2587 
2588   PetscFunctionBegin;
2589   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2590   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2591   PetscCall(PetscLayoutSetUp(B->rmap));
2592   PetscCall(PetscLayoutSetUp(B->cmap));
2593   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2594   m      = B->rmap->n / bs;
2595   rstart = B->rmap->rstart / bs;
2596   cstart = B->cmap->rstart / bs;
2597   cend   = B->cmap->rend / bs;
2598 
2599   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2600   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2601   for (i = 0; i < m; i++) {
2602     nz = ii[i + 1] - ii[i];
2603     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2604     nz_max = PetscMax(nz_max, nz);
2605     dlen   = 0;
2606     olen   = 0;
2607     JJ     = jj + ii[i];
2608     for (j = 0; j < nz; j++) {
2609       if (*JJ < cstart || *JJ >= cend) olen++;
2610       else dlen++;
2611       JJ++;
2612     }
2613     d_nnz[i] = dlen;
2614     o_nnz[i] = olen;
2615   }
2616   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2617   PetscCall(PetscFree2(d_nnz, o_nnz));
2618 
2619   values = (PetscScalar *)V;
2620   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2621   for (i = 0; i < m; i++) {
2622     PetscInt        row   = i + rstart;
2623     PetscInt        ncols = ii[i + 1] - ii[i];
2624     const PetscInt *icols = jj + ii[i];
2625     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2626       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2627       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2628     } else { /* block ordering does not match so we can only insert one block at a time. */
2629       PetscInt j;
2630       for (j = 0; j < ncols; j++) {
2631         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2632         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2633       }
2634     }
2635   }
2636 
2637   if (!V) PetscCall(PetscFree(values));
2638   nooffprocentries    = B->nooffprocentries;
2639   B->nooffprocentries = PETSC_TRUE;
2640   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2641   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2642   B->nooffprocentries = nooffprocentries;
2643 
2644   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2645   PetscFunctionReturn(PETSC_SUCCESS);
2646 }
2647 
2648 /*@C
2649   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2650 
2651   Collective
2652 
2653   Input Parameters:
2654 + B  - the matrix
2655 . bs - the block size
2656 . i  - the indices into `j` for the start of each local row (starts with zero)
2657 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2658 - v  - optional values in the matrix, use `NULL` if not provided
2659 
2660   Level: advanced
2661 
2662   Notes:
2663   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2664   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2665   called this routine.
2666 
2667   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2668   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2669   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2670   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2671   block column and the second index is over columns within a block.
2672 
2673   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2674 
2675 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ`
2676 @*/
2677 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2678 {
2679   PetscFunctionBegin;
2680   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2681   PetscValidType(B, 1);
2682   PetscValidLogicalCollectiveInt(B, bs, 2);
2683   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2684   PetscFunctionReturn(PETSC_SUCCESS);
2685 }
2686 
2687 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2688 {
2689   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2690   PetscInt     i;
2691   PetscMPIInt  size;
2692 
2693   PetscFunctionBegin;
2694   if (B->hash_active) {
2695     B->ops[0]      = b->cops;
2696     B->hash_active = PETSC_FALSE;
2697   }
2698   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2699   PetscCall(MatSetBlockSize(B, bs));
2700   PetscCall(PetscLayoutSetUp(B->rmap));
2701   PetscCall(PetscLayoutSetUp(B->cmap));
2702   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2703 
2704   if (d_nnz) {
2705     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2706   }
2707   if (o_nnz) {
2708     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2709   }
2710 
2711   b->bs2 = bs * bs;
2712   b->mbs = B->rmap->n / bs;
2713   b->nbs = B->cmap->n / bs;
2714   b->Mbs = B->rmap->N / bs;
2715   b->Nbs = B->cmap->N / bs;
2716 
2717   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2718   b->rstartbs = B->rmap->rstart / bs;
2719   b->rendbs   = B->rmap->rend / bs;
2720   b->cstartbs = B->cmap->rstart / bs;
2721   b->cendbs   = B->cmap->rend / bs;
2722 
2723 #if defined(PETSC_USE_CTABLE)
2724   PetscCall(PetscHMapIDestroy(&b->colmap));
2725 #else
2726   PetscCall(PetscFree(b->colmap));
2727 #endif
2728   PetscCall(PetscFree(b->garray));
2729   PetscCall(VecDestroy(&b->lvec));
2730   PetscCall(VecScatterDestroy(&b->Mvctx));
2731 
2732   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2733 
2734   MatSeqXAIJGetOptions_Private(b->B);
2735   PetscCall(MatDestroy(&b->B));
2736   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2737   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2738   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2739   MatSeqXAIJRestoreOptions_Private(b->B);
2740 
2741   MatSeqXAIJGetOptions_Private(b->A);
2742   PetscCall(MatDestroy(&b->A));
2743   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2744   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2745   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2746   MatSeqXAIJRestoreOptions_Private(b->A);
2747 
2748   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2749   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2750   B->preallocated  = PETSC_TRUE;
2751   B->was_assembled = PETSC_FALSE;
2752   B->assembled     = PETSC_FALSE;
2753   PetscFunctionReturn(PETSC_SUCCESS);
2754 }
2755 
2756 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2757 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2758 
2759 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2760 {
2761   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2762   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2763   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2764   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2765 
2766   PetscFunctionBegin;
2767   PetscCall(PetscMalloc1(M + 1, &ii));
2768   ii[0] = 0;
2769   for (i = 0; i < M; i++) {
2770     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2771     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2772     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2773     /* remove one from count of matrix has diagonal */
2774     for (j = id[i]; j < id[i + 1]; j++) {
2775       if (jd[j] == i) {
2776         ii[i + 1]--;
2777         break;
2778       }
2779     }
2780   }
2781   PetscCall(PetscMalloc1(ii[M], &jj));
2782   cnt = 0;
2783   for (i = 0; i < M; i++) {
2784     for (j = io[i]; j < io[i + 1]; j++) {
2785       if (garray[jo[j]] > rstart) break;
2786       jj[cnt++] = garray[jo[j]];
2787     }
2788     for (k = id[i]; k < id[i + 1]; k++) {
2789       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2790     }
2791     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2792   }
2793   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2794   PetscFunctionReturn(PETSC_SUCCESS);
2795 }
2796 
2797 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2798 
2799 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2800 
2801 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2802 {
2803   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2804   Mat_MPIAIJ  *b;
2805   Mat          B;
2806 
2807   PetscFunctionBegin;
2808   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2809 
2810   if (reuse == MAT_REUSE_MATRIX) {
2811     B = *newmat;
2812   } else {
2813     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2814     PetscCall(MatSetType(B, MATMPIAIJ));
2815     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2816     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2817     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2818     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2819   }
2820   b = (Mat_MPIAIJ *)B->data;
2821 
2822   if (reuse == MAT_REUSE_MATRIX) {
2823     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2824     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2825   } else {
2826     PetscInt   *garray = a->garray;
2827     Mat_SeqAIJ *bB;
2828     PetscInt    bs, nnz;
2829     PetscCall(MatDestroy(&b->A));
2830     PetscCall(MatDestroy(&b->B));
2831     /* just clear out the data structure */
2832     PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_FALSE));
2833     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2834     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2835 
2836     /* Global numbering for b->B columns */
2837     bB  = (Mat_SeqAIJ *)b->B->data;
2838     bs  = A->rmap->bs;
2839     nnz = bB->i[A->rmap->n];
2840     for (PetscInt k = 0; k < nnz; k++) {
2841       PetscInt bj = bB->j[k] / bs;
2842       PetscInt br = bB->j[k] % bs;
2843       bB->j[k]    = garray[bj] * bs + br;
2844     }
2845   }
2846   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2847   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2848 
2849   if (reuse == MAT_INPLACE_MATRIX) {
2850     PetscCall(MatHeaderReplace(A, &B));
2851   } else {
2852     *newmat = B;
2853   }
2854   PetscFunctionReturn(PETSC_SUCCESS);
2855 }
2856 
2857 /*MC
2858    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2859 
2860    Options Database Keys:
2861 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2862 . -mat_block_size <bs> - set the blocksize used to store the matrix
2863 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2864 - -mat_use_hash_table <fact> - set hash table factor
2865 
2866    Level: beginner
2867 
2868    Note:
2869     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2870     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2871 
2872 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2873 M*/
2874 
2875 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2876 
2877 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2878 {
2879   Mat_MPIBAIJ *b;
2880   PetscBool    flg = PETSC_FALSE;
2881 
2882   PetscFunctionBegin;
2883   PetscCall(PetscNew(&b));
2884   B->data      = (void *)b;
2885   B->ops[0]    = MatOps_Values;
2886   B->assembled = PETSC_FALSE;
2887 
2888   B->insertmode = NOT_SET_VALUES;
2889   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2890   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2891 
2892   /* build local table of row and column ownerships */
2893   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2894 
2895   /* build cache for off array entries formed */
2896   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2897 
2898   b->donotstash  = PETSC_FALSE;
2899   b->colmap      = NULL;
2900   b->garray      = NULL;
2901   b->roworiented = PETSC_TRUE;
2902 
2903   /* stuff used in block assembly */
2904   b->barray = NULL;
2905 
2906   /* stuff used for matrix vector multiply */
2907   b->lvec  = NULL;
2908   b->Mvctx = NULL;
2909 
2910   /* stuff for MatGetRow() */
2911   b->rowindices   = NULL;
2912   b->rowvalues    = NULL;
2913   b->getrowactive = PETSC_FALSE;
2914 
2915   /* hash table stuff */
2916   b->ht           = NULL;
2917   b->hd           = NULL;
2918   b->ht_size      = 0;
2919   b->ht_flag      = PETSC_FALSE;
2920   b->ht_fact      = 0;
2921   b->ht_total_ct  = 0;
2922   b->ht_insert_ct = 0;
2923 
2924   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2925   b->ijonly = PETSC_FALSE;
2926 
2927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2928   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2929   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2930 #if defined(PETSC_HAVE_HYPRE)
2931   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2932 #endif
2933   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2934   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2935   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2936   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2937   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2938   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2939   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2940   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2941 
2942   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2943   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2944   if (flg) {
2945     PetscReal fact = 1.39;
2946     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2947     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2948     if (fact <= 1.0) fact = 1.39;
2949     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2950     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2951   }
2952   PetscOptionsEnd();
2953   PetscFunctionReturn(PETSC_SUCCESS);
2954 }
2955 
2956 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2957 /*MC
2958    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2959 
2960    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2961    and `MATMPIBAIJ` otherwise.
2962 
2963    Options Database Keys:
2964 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2965 
2966   Level: beginner
2967 
2968 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2969 M*/
2970 
2971 /*@
2972   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2973   (block compressed row).
2974 
2975   Collective
2976 
2977   Input Parameters:
2978 + B     - the matrix
2979 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2980           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2981 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2982            submatrix  (same for all local rows)
2983 . d_nnz - array containing the number of block nonzeros in the various block rows
2984            of the in diagonal portion of the local (possibly different for each block
2985            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2986            set it even if it is zero.
2987 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2988            submatrix (same for all local rows).
2989 - o_nnz - array containing the number of nonzeros in the various block rows of the
2990            off-diagonal portion of the local submatrix (possibly different for
2991            each block row) or `NULL`.
2992 
2993    If the *_nnz parameter is given then the *_nz parameter is ignored
2994 
2995   Options Database Keys:
2996 + -mat_block_size            - size of the blocks to use
2997 - -mat_use_hash_table <fact> - set hash table factor
2998 
2999   Level: intermediate
3000 
3001   Notes:
3002   For good matrix assembly performance
3003   the user should preallocate the matrix storage by setting the parameters
3004   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3005   performance can be increased by more than a factor of 50.
3006 
3007   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3008   than it must be used on all processors that share the object for that argument.
3009 
3010   Storage Information:
3011   For a square global matrix we define each processor's diagonal portion
3012   to be its local rows and the corresponding columns (a square submatrix);
3013   each processor's off-diagonal portion encompasses the remainder of the
3014   local matrix (a rectangular submatrix).
3015 
3016   The user can specify preallocated storage for the diagonal part of
3017   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
3018   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3019   memory allocation.  Likewise, specify preallocated storage for the
3020   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3021 
3022   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3023   the figure below we depict these three local rows and all columns (0-11).
3024 
3025 .vb
3026            0 1 2 3 4 5 6 7 8 9 10 11
3027           --------------------------
3028    row 3  |o o o d d d o o o o  o  o
3029    row 4  |o o o d d d o o o o  o  o
3030    row 5  |o o o d d d o o o o  o  o
3031           --------------------------
3032 .ve
3033 
3034   Thus, any entries in the d locations are stored in the d (diagonal)
3035   submatrix, and any entries in the o locations are stored in the
3036   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3037   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3038 
3039   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3040   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3041   In general, for PDE problems in which most nonzeros are near the diagonal,
3042   one expects `d_nz` >> `o_nz`.
3043 
3044   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3045   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3046   You can also run with the option `-info` and look for messages with the string
3047   malloc in them to see if additional memory allocation was needed.
3048 
3049 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3050 @*/
3051 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3052 {
3053   PetscFunctionBegin;
3054   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3055   PetscValidType(B, 1);
3056   PetscValidLogicalCollectiveInt(B, bs, 2);
3057   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3058   PetscFunctionReturn(PETSC_SUCCESS);
3059 }
3060 
3061 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3062 /*@
3063   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3064   (block compressed row).
3065 
3066   Collective
3067 
3068   Input Parameters:
3069 + comm  - MPI communicator
3070 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3071           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3072 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3073           This value should be the same as the local size used in creating the
3074           y vector for the matrix-vector product y = Ax.
3075 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3076           This value should be the same as the local size used in creating the
3077           x vector for the matrix-vector product y = Ax.
3078 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3079 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3080 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3081           submatrix  (same for all local rows)
3082 . d_nnz - array containing the number of nonzero blocks in the various block rows
3083           of the in diagonal portion of the local (possibly different for each block
3084           row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3085           and set it even if it is zero.
3086 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3087           submatrix (same for all local rows).
3088 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3089           off-diagonal portion of the local submatrix (possibly different for
3090           each block row) or NULL.
3091 
3092   Output Parameter:
3093 . A - the matrix
3094 
3095   Options Database Keys:
3096 + -mat_block_size            - size of the blocks to use
3097 - -mat_use_hash_table <fact> - set hash table factor
3098 
3099   Level: intermediate
3100 
3101   Notes:
3102   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3103   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3104   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3105 
3106   For good matrix assembly performance
3107   the user should preallocate the matrix storage by setting the parameters
3108   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3109   performance can be increased by more than a factor of 50.
3110 
3111   If the *_nnz parameter is given then the *_nz parameter is ignored
3112 
3113   A nonzero block is any block that as 1 or more nonzeros in it
3114 
3115   The user MUST specify either the local or global matrix dimensions
3116   (possibly both).
3117 
3118   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3119   than it must be used on all processors that share the object for that argument.
3120 
3121   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
3122   `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
3123 
3124   Storage Information:
3125   For a square global matrix we define each processor's diagonal portion
3126   to be its local rows and the corresponding columns (a square submatrix);
3127   each processor's off-diagonal portion encompasses the remainder of the
3128   local matrix (a rectangular submatrix).
3129 
3130   The user can specify preallocated storage for the diagonal part of
3131   the local submatrix with either d_nz or d_nnz (not both).  Set
3132   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3133   memory allocation.  Likewise, specify preallocated storage for the
3134   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3135 
3136   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3137   the figure below we depict these three local rows and all columns (0-11).
3138 
3139 .vb
3140            0 1 2 3 4 5 6 7 8 9 10 11
3141           --------------------------
3142    row 3  |o o o d d d o o o o  o  o
3143    row 4  |o o o d d d o o o o  o  o
3144    row 5  |o o o d d d o o o o  o  o
3145           --------------------------
3146 .ve
3147 
3148   Thus, any entries in the d locations are stored in the d (diagonal)
3149   submatrix, and any entries in the o locations are stored in the
3150   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3151   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3152 
3153   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3154   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3155   In general, for PDE problems in which most nonzeros are near the diagonal,
3156   one expects `d_nz` >> `o_nz`.
3157 
3158 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`,
3159           `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
3160 @*/
3161 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3162 {
3163   PetscMPIInt size;
3164 
3165   PetscFunctionBegin;
3166   PetscCall(MatCreate(comm, A));
3167   PetscCall(MatSetSizes(*A, m, n, M, N));
3168   PetscCallMPI(MPI_Comm_size(comm, &size));
3169   if (size > 1) {
3170     PetscCall(MatSetType(*A, MATMPIBAIJ));
3171     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3172   } else {
3173     PetscCall(MatSetType(*A, MATSEQBAIJ));
3174     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3175   }
3176   PetscFunctionReturn(PETSC_SUCCESS);
3177 }
3178 
3179 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3180 {
3181   Mat          mat;
3182   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3183   PetscInt     len = 0;
3184 
3185   PetscFunctionBegin;
3186   *newmat = NULL;
3187   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3188   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3189   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3190 
3191   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3192   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3193   if (matin->hash_active) {
3194     PetscCall(MatSetUp(mat));
3195   } else {
3196     mat->factortype   = matin->factortype;
3197     mat->preallocated = PETSC_TRUE;
3198     mat->assembled    = PETSC_TRUE;
3199     mat->insertmode   = NOT_SET_VALUES;
3200 
3201     a             = (Mat_MPIBAIJ *)mat->data;
3202     mat->rmap->bs = matin->rmap->bs;
3203     a->bs2        = oldmat->bs2;
3204     a->mbs        = oldmat->mbs;
3205     a->nbs        = oldmat->nbs;
3206     a->Mbs        = oldmat->Mbs;
3207     a->Nbs        = oldmat->Nbs;
3208 
3209     a->size         = oldmat->size;
3210     a->rank         = oldmat->rank;
3211     a->donotstash   = oldmat->donotstash;
3212     a->roworiented  = oldmat->roworiented;
3213     a->rowindices   = NULL;
3214     a->rowvalues    = NULL;
3215     a->getrowactive = PETSC_FALSE;
3216     a->barray       = NULL;
3217     a->rstartbs     = oldmat->rstartbs;
3218     a->rendbs       = oldmat->rendbs;
3219     a->cstartbs     = oldmat->cstartbs;
3220     a->cendbs       = oldmat->cendbs;
3221 
3222     /* hash table stuff */
3223     a->ht           = NULL;
3224     a->hd           = NULL;
3225     a->ht_size      = 0;
3226     a->ht_flag      = oldmat->ht_flag;
3227     a->ht_fact      = oldmat->ht_fact;
3228     a->ht_total_ct  = 0;
3229     a->ht_insert_ct = 0;
3230 
3231     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3232     if (oldmat->colmap) {
3233 #if defined(PETSC_USE_CTABLE)
3234       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3235 #else
3236       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3237       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3238 #endif
3239     } else a->colmap = NULL;
3240 
3241     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
3242       PetscCall(PetscMalloc1(len, &a->garray));
3243       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3244     } else a->garray = NULL;
3245 
3246     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3247     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3248     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3249 
3250     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3251     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3252   }
3253   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3254   *newmat = mat;
3255   PetscFunctionReturn(PETSC_SUCCESS);
3256 }
3257 
3258 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3259 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3260 {
3261   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3262   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3263   PetscScalar *matvals;
3264 
3265   PetscFunctionBegin;
3266   PetscCall(PetscViewerSetUp(viewer));
3267 
3268   /* read in matrix header */
3269   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3270   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3271   M  = header[1];
3272   N  = header[2];
3273   nz = header[3];
3274   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3275   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3276   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3277 
3278   /* set block sizes from the viewer's .info file */
3279   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3280   /* set local sizes if not set already */
3281   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3282   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3283   /* set global sizes if not set already */
3284   if (mat->rmap->N < 0) mat->rmap->N = M;
3285   if (mat->cmap->N < 0) mat->cmap->N = N;
3286   PetscCall(PetscLayoutSetUp(mat->rmap));
3287   PetscCall(PetscLayoutSetUp(mat->cmap));
3288 
3289   /* check if the matrix sizes are correct */
3290   PetscCall(MatGetSize(mat, &rows, &cols));
3291   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3292   PetscCall(MatGetBlockSize(mat, &bs));
3293   PetscCall(MatGetLocalSize(mat, &m, &n));
3294   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3295   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3296   mbs = m / bs;
3297   nbs = n / bs;
3298 
3299   /* read in row lengths and build row indices */
3300   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3301   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3302   rowidxs[0] = 0;
3303   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3304   PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3305   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3306 
3307   /* read in column indices and matrix values */
3308   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3309   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3310   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3311 
3312   {                /* preallocate matrix storage */
3313     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3314     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3315     PetscBool  sbaij, done;
3316     PetscInt  *d_nnz, *o_nnz;
3317 
3318     PetscCall(PetscBTCreate(nbs, &bt));
3319     PetscCall(PetscHSetICreate(&ht));
3320     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3321     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3322     for (i = 0; i < mbs; i++) {
3323       PetscCall(PetscBTMemzero(nbs, bt));
3324       PetscCall(PetscHSetIClear(ht));
3325       for (k = 0; k < bs; k++) {
3326         PetscInt row = bs * i + k;
3327         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3328           PetscInt col = colidxs[j];
3329           if (!sbaij || col >= row) {
3330             if (col >= cs && col < ce) {
3331               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3332             } else {
3333               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3334               if (done) o_nnz[i]++;
3335             }
3336           }
3337         }
3338       }
3339     }
3340     PetscCall(PetscBTDestroy(&bt));
3341     PetscCall(PetscHSetIDestroy(&ht));
3342     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3343     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3344     PetscCall(PetscFree2(d_nnz, o_nnz));
3345   }
3346 
3347   /* store matrix values */
3348   for (i = 0; i < m; i++) {
3349     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3350     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3351   }
3352 
3353   PetscCall(PetscFree(rowidxs));
3354   PetscCall(PetscFree2(colidxs, matvals));
3355   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3356   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3357   PetscFunctionReturn(PETSC_SUCCESS);
3358 }
3359 
3360 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3361 {
3362   PetscBool isbinary;
3363 
3364   PetscFunctionBegin;
3365   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3366   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3367   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3368   PetscFunctionReturn(PETSC_SUCCESS);
3369 }
3370 
3371 /*@
3372   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3373 
3374   Input Parameters:
3375 + mat  - the matrix
3376 - fact - factor
3377 
3378   Options Database Key:
3379 . -mat_use_hash_table <fact> - provide the factor
3380 
3381   Level: advanced
3382 
3383 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3384 @*/
3385 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3386 {
3387   PetscFunctionBegin;
3388   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3389   PetscFunctionReturn(PETSC_SUCCESS);
3390 }
3391 
3392 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3393 {
3394   Mat_MPIBAIJ *baij;
3395 
3396   PetscFunctionBegin;
3397   baij          = (Mat_MPIBAIJ *)mat->data;
3398   baij->ht_fact = fact;
3399   PetscFunctionReturn(PETSC_SUCCESS);
3400 }
3401 
3402 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3403 {
3404   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3405   PetscBool    flg;
3406 
3407   PetscFunctionBegin;
3408   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3409   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3410   if (Ad) *Ad = a->A;
3411   if (Ao) *Ao = a->B;
3412   if (colmap) *colmap = a->garray;
3413   PetscFunctionReturn(PETSC_SUCCESS);
3414 }
3415 
3416 /*
3417     Special version for direct calls from Fortran (to eliminate two function call overheads
3418 */
3419 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3420   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3421 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3422   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3423 #endif
3424 
3425 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3426 /*@C
3427   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3428 
3429   Collective
3430 
3431   Input Parameters:
3432 + matin  - the matrix
3433 . min    - number of input rows
3434 . im     - input rows
3435 . nin    - number of input columns
3436 . in     - input columns
3437 . v      - numerical values input
3438 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3439 
3440   Level: advanced
3441 
3442   Developer Notes:
3443   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3444 
3445 .seealso: `Mat`, `MatSetValuesBlocked()`
3446 @*/
3447 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3448 {
3449   /* convert input arguments to C version */
3450   Mat        mat = *matin;
3451   PetscInt   m = *min, n = *nin;
3452   InsertMode addv = *addvin;
3453 
3454   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3455   const MatScalar *value;
3456   MatScalar       *barray      = baij->barray;
3457   PetscBool        roworiented = baij->roworiented;
3458   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3459   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3460   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3461 
3462   PetscFunctionBegin;
3463   /* tasks normally handled by MatSetValuesBlocked() */
3464   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3465   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3466   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3467   if (mat->assembled) {
3468     mat->was_assembled = PETSC_TRUE;
3469     mat->assembled     = PETSC_FALSE;
3470   }
3471   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3472 
3473   if (!barray) {
3474     PetscCall(PetscMalloc1(bs2, &barray));
3475     baij->barray = barray;
3476   }
3477 
3478   if (roworiented) stepval = (n - 1) * bs;
3479   else stepval = (m - 1) * bs;
3480 
3481   for (i = 0; i < m; i++) {
3482     if (im[i] < 0) continue;
3483     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3484     if (im[i] >= rstart && im[i] < rend) {
3485       row = im[i] - rstart;
3486       for (j = 0; j < n; j++) {
3487         /* If NumCol = 1 then a copy is not required */
3488         if ((roworiented) && (n == 1)) {
3489           barray = (MatScalar *)v + i * bs2;
3490         } else if ((!roworiented) && (m == 1)) {
3491           barray = (MatScalar *)v + j * bs2;
3492         } else { /* Here a copy is required */
3493           if (roworiented) {
3494             value = v + i * (stepval + bs) * bs + j * bs;
3495           } else {
3496             value = v + j * (stepval + bs) * bs + i * bs;
3497           }
3498           for (ii = 0; ii < bs; ii++, value += stepval) {
3499             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3500           }
3501           barray -= bs2;
3502         }
3503 
3504         if (in[j] >= cstart && in[j] < cend) {
3505           col = in[j] - cstart;
3506           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3507         } else if (in[j] < 0) {
3508           continue;
3509         } else {
3510           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3511           if (mat->was_assembled) {
3512             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3513 
3514 #if defined(PETSC_USE_DEBUG)
3515   #if defined(PETSC_USE_CTABLE)
3516             {
3517               PetscInt data;
3518               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3519               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3520             }
3521   #else
3522             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3523   #endif
3524 #endif
3525 #if defined(PETSC_USE_CTABLE)
3526             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3527             col = (col - 1) / bs;
3528 #else
3529             col = (baij->colmap[in[j]] - 1) / bs;
3530 #endif
3531             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
3532               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3533               col = in[j];
3534             }
3535           } else col = in[j];
3536           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3537         }
3538       }
3539     } else {
3540       if (!baij->donotstash) {
3541         if (roworiented) {
3542           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3543         } else {
3544           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3545         }
3546       }
3547     }
3548   }
3549 
3550   /* task normally handled by MatSetValuesBlocked() */
3551   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3552   PetscFunctionReturn(PETSC_SUCCESS);
3553 }
3554 
3555 /*@
3556   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows.
3557 
3558   Collective
3559 
3560   Input Parameters:
3561 + comm - MPI communicator
3562 . bs   - the block size, only a block size of 1 is supported
3563 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3564 . n    - This value should be the same as the local size used in creating the
3565          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
3566          calculated if `N` is given) For square matrices `n` is almost always `m`.
3567 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
3568 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
3569 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3570 . j    - column indices
3571 - a    - matrix values
3572 
3573   Output Parameter:
3574 . mat - the matrix
3575 
3576   Level: intermediate
3577 
3578   Notes:
3579   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3580   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3581   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3582 
3583   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3584   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3585   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3586   with column-major ordering within blocks.
3587 
3588   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3589 
3590 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3591           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3592 @*/
3593 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3594 {
3595   PetscFunctionBegin;
3596   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3597   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3598   PetscCall(MatCreate(comm, mat));
3599   PetscCall(MatSetSizes(*mat, m, n, M, N));
3600   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3601   PetscCall(MatSetBlockSize(*mat, bs));
3602   PetscCall(MatSetUp(*mat));
3603   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3604   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3605   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3606   PetscFunctionReturn(PETSC_SUCCESS);
3607 }
3608 
3609 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3610 {
3611   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3612   PetscInt    *indx;
3613   PetscScalar *values;
3614 
3615   PetscFunctionBegin;
3616   PetscCall(MatGetSize(inmat, &m, &N));
3617   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3618     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3619     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3620     PetscInt    *bindx, rmax = a->rmax, j;
3621     PetscMPIInt  rank, size;
3622 
3623     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3624     mbs = m / bs;
3625     Nbs = N / cbs;
3626     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3627     nbs = n / cbs;
3628 
3629     PetscCall(PetscMalloc1(rmax, &bindx));
3630     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3631 
3632     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3633     PetscCallMPI(MPI_Comm_rank(comm, &size));
3634     if (rank == size - 1) {
3635       /* Check sum(nbs) = Nbs */
3636       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3637     }
3638 
3639     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3640     for (i = 0; i < mbs; i++) {
3641       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3642       nnz = nnz / bs;
3643       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3644       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3645       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3646     }
3647     PetscCall(PetscFree(bindx));
3648 
3649     PetscCall(MatCreate(comm, outmat));
3650     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3651     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3652     PetscCall(MatSetType(*outmat, MATBAIJ));
3653     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3654     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3655     MatPreallocateEnd(dnz, onz);
3656     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3657   }
3658 
3659   /* numeric phase */
3660   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3661   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3662 
3663   for (i = 0; i < m; i++) {
3664     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3665     Ii = i + rstart;
3666     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3667     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3668   }
3669   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3670   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3671   PetscFunctionReturn(PETSC_SUCCESS);
3672 }
3673