xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 7cd49bdee00a26e142bff6e7df7c3fb9209fa795)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
8 {
9   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
10 
11   PetscFunctionBegin;
12   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13   PetscCall(MatStashDestroy_Private(&mat->stash));
14   PetscCall(MatStashDestroy_Private(&mat->bstash));
15   PetscCall(MatDestroy(&baij->A));
16   PetscCall(MatDestroy(&baij->B));
17 #if defined(PETSC_USE_CTABLE)
18   PetscCall(PetscHMapIDestroy(&baij->colmap));
19 #else
20   PetscCall(PetscFree(baij->colmap));
21 #endif
22   PetscCall(PetscFree(baij->garray));
23   PetscCall(VecDestroy(&baij->lvec));
24   PetscCall(VecScatterDestroy(&baij->Mvctx));
25   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
26   PetscCall(PetscFree(baij->barray));
27   PetscCall(PetscFree2(baij->hd, baij->ht));
28   PetscCall(PetscFree(baij->rangebs));
29   PetscCall(PetscFree(mat->data));
30 
31   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
32   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
33   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
34   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
35   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
41 #if defined(PETSC_HAVE_HYPRE)
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
43 #endif
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
49 #define TYPE BAIJ
50 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
51 #undef TYPE
52 
53 #if defined(PETSC_HAVE_HYPRE)
54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
55 #endif
56 
57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[])
58 {
59   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
60   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
61   PetscScalar       *vv;
62   Vec                vB, vA;
63   const PetscScalar *va, *vb;
64 
65   PetscFunctionBegin;
66   PetscCall(MatCreateVecs(a->A, NULL, &vA));
67   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
68 
69   PetscCall(VecGetArrayRead(vA, &va));
70   if (idx) {
71     for (i = 0; i < m; i++) {
72       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
73     }
74   }
75 
76   PetscCall(MatCreateVecs(a->B, NULL, &vB));
77   PetscCall(PetscMalloc1(m, &idxb));
78   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
79 
80   PetscCall(VecGetArrayWrite(v, &vv));
81   PetscCall(VecGetArrayRead(vB, &vb));
82   for (i = 0; i < m; i++) {
83     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
84       vv[i] = vb[i];
85       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
86     } else {
87       vv[i] = va[i];
88       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
89     }
90   }
91   PetscCall(VecRestoreArrayWrite(v, &vv));
92   PetscCall(VecRestoreArrayRead(vA, &va));
93   PetscCall(VecRestoreArrayRead(vB, &vb));
94   PetscCall(PetscFree(idxb));
95   PetscCall(VecDestroy(&vA));
96   PetscCall(VecDestroy(&vB));
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v)
101 {
102   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
103   Vec          vB, vA;
104 
105   PetscFunctionBegin;
106   PetscCall(MatCreateVecs(a->A, NULL, &vA));
107   PetscCall(MatGetRowSumAbs(a->A, vA));
108   PetscCall(MatCreateVecs(a->B, NULL, &vB));
109   PetscCall(MatGetRowSumAbs(a->B, vB));
110   PetscCall(VecAXPY(vA, 1.0, vB));
111   PetscCall(VecDestroy(&vB));
112   PetscCall(VecCopy(vA, v));
113   PetscCall(VecDestroy(&vA));
114   PetscFunctionReturn(PETSC_SUCCESS);
115 }
116 
117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
118 {
119   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
120 
121   PetscFunctionBegin;
122   PetscCall(MatStoreValues(aij->A));
123   PetscCall(MatStoreValues(aij->B));
124   PetscFunctionReturn(PETSC_SUCCESS);
125 }
126 
127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
128 {
129   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
130 
131   PetscFunctionBegin;
132   PetscCall(MatRetrieveValues(aij->A));
133   PetscCall(MatRetrieveValues(aij->B));
134   PetscFunctionReturn(PETSC_SUCCESS);
135 }
136 
137 /*
138      Local utility routine that creates a mapping from the global column
139    number to the local number in the off-diagonal part of the local
140    storage of the matrix.  This is done in a non scalable way since the
141    length of colmap equals the global matrix length.
142 */
143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
144 {
145   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
146   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
147   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
148 
149   PetscFunctionBegin;
150 #if defined(PETSC_USE_CTABLE)
151   PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap));
152   for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1));
153 #else
154   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
155   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
156 #endif
157   PetscFunctionReturn(PETSC_SUCCESS);
158 }
159 
160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
161   do { \
162     brow = row / bs; \
163     rp   = PetscSafePointerPlusOffset(aj, ai[brow]); \
164     ap   = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \
165     rmax = aimax[brow]; \
166     nrow = ailen[brow]; \
167     bcol = col / bs; \
168     ridx = row % bs; \
169     cidx = col % bs; \
170     low  = 0; \
171     high = nrow; \
172     while (high - low > 3) { \
173       t = (low + high) / 2; \
174       if (rp[t] > bcol) high = t; \
175       else low = t; \
176     } \
177     for (_i = low; _i < high; _i++) { \
178       if (rp[_i] > bcol) break; \
179       if (rp[_i] == bcol) { \
180         bap = ap + bs2 * _i + bs * cidx + ridx; \
181         if (addv == ADD_VALUES) *bap += value; \
182         else *bap = value; \
183         goto a_noinsert; \
184       } \
185     } \
186     if (a->nonew == 1) goto a_noinsert; \
187     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
188     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
189     N = nrow++ - 1; \
190     /* shift up all the later entries in this row */ \
191     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
192     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
193     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
194     rp[_i]                          = bcol; \
195     ap[bs2 * _i + bs * cidx + ridx] = value; \
196   a_noinsert:; \
197     ailen[brow] = nrow; \
198   } while (0)
199 
200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
201   do { \
202     brow = row / bs; \
203     rp   = PetscSafePointerPlusOffset(bj, bi[brow]); \
204     ap   = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \
205     rmax = bimax[brow]; \
206     nrow = bilen[brow]; \
207     bcol = col / bs; \
208     ridx = row % bs; \
209     cidx = col % bs; \
210     low  = 0; \
211     high = nrow; \
212     while (high - low > 3) { \
213       t = (low + high) / 2; \
214       if (rp[t] > bcol) high = t; \
215       else low = t; \
216     } \
217     for (_i = low; _i < high; _i++) { \
218       if (rp[_i] > bcol) break; \
219       if (rp[_i] == bcol) { \
220         bap = ap + bs2 * _i + bs * cidx + ridx; \
221         if (addv == ADD_VALUES) *bap += value; \
222         else *bap = value; \
223         goto b_noinsert; \
224       } \
225     } \
226     if (b->nonew == 1) goto b_noinsert; \
227     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
228     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
229     N = nrow++ - 1; \
230     /* shift up all the later entries in this row */ \
231     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
232     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
233     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
234     rp[_i]                          = bcol; \
235     ap[bs2 * _i + bs * cidx + ridx] = value; \
236   b_noinsert:; \
237     bilen[brow] = nrow; \
238   } while (0)
239 
240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
241 {
242   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
243   MatScalar    value;
244   PetscBool    roworiented = baij->roworiented;
245   PetscInt     i, j, row, col;
246   PetscInt     rstart_orig = mat->rmap->rstart;
247   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
248   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
249 
250   /* Some Variables required in the macro */
251   Mat          A     = baij->A;
252   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)A->data;
253   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
254   MatScalar   *aa = a->a;
255 
256   Mat          B     = baij->B;
257   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)B->data;
258   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
259   MatScalar   *ba = b->a;
260 
261   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
262   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
263   MatScalar *ap, *bap;
264 
265   PetscFunctionBegin;
266   for (i = 0; i < m; i++) {
267     if (im[i] < 0) continue;
268     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
269     if (im[i] >= rstart_orig && im[i] < rend_orig) {
270       row = im[i] - rstart_orig;
271       for (j = 0; j < n; j++) {
272         if (in[j] >= cstart_orig && in[j] < cend_orig) {
273           col = in[j] - cstart_orig;
274           if (roworiented) value = v[i * n + j];
275           else value = v[i + j * m];
276           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
277         } else if (in[j] < 0) {
278           continue;
279         } else {
280           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
281           if (mat->was_assembled) {
282             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
283 #if defined(PETSC_USE_CTABLE)
284             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
285             col = col - 1;
286 #else
287             col = baij->colmap[in[j] / bs] - 1;
288 #endif
289             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
290               PetscCall(MatDisAssemble_MPIBAIJ(mat));
291               col = in[j];
292               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
293               B     = baij->B;
294               b     = (Mat_SeqBAIJ *)B->data;
295               bimax = b->imax;
296               bi    = b->i;
297               bilen = b->ilen;
298               bj    = b->j;
299               ba    = b->a;
300             } else {
301               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
302               col += in[j] % bs;
303             }
304           } else col = in[j];
305           if (roworiented) value = v[i * n + j];
306           else value = v[i + j * m];
307           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
308           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
309         }
310       }
311     } else {
312       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
313       if (!baij->donotstash) {
314         mat->assembled = PETSC_FALSE;
315         if (roworiented) {
316           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
317         } else {
318           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
319         }
320       }
321     }
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
327 {
328   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
329   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
330   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
331   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
332   PetscBool          roworiented = a->roworiented;
333   const PetscScalar *value       = v;
334   MatScalar         *ap, *aa = a->a, *bap;
335 
336   PetscFunctionBegin;
337   rp    = aj + ai[row];
338   ap    = aa + bs2 * ai[row];
339   rmax  = imax[row];
340   nrow  = ailen[row];
341   value = v;
342   low   = 0;
343   high  = nrow;
344   while (high - low > 7) {
345     t = (low + high) / 2;
346     if (rp[t] > col) high = t;
347     else low = t;
348   }
349   for (i = low; i < high; i++) {
350     if (rp[i] > col) break;
351     if (rp[i] == col) {
352       bap = ap + bs2 * i;
353       if (roworiented) {
354         if (is == ADD_VALUES) {
355           for (ii = 0; ii < bs; ii++) {
356             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
357           }
358         } else {
359           for (ii = 0; ii < bs; ii++) {
360             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
361           }
362         }
363       } else {
364         if (is == ADD_VALUES) {
365           for (ii = 0; ii < bs; ii++, value += bs) {
366             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
367             bap += bs;
368           }
369         } else {
370           for (ii = 0; ii < bs; ii++, value += bs) {
371             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
372             bap += bs;
373           }
374         }
375       }
376       goto noinsert2;
377     }
378   }
379   if (nonew == 1) goto noinsert2;
380   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
381   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
382   N = nrow++ - 1;
383   high++;
384   /* shift up all the later entries in this row */
385   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
386   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
387   rp[i] = col;
388   bap   = ap + bs2 * i;
389   if (roworiented) {
390     for (ii = 0; ii < bs; ii++) {
391       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
392     }
393   } else {
394     for (ii = 0; ii < bs; ii++) {
395       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
396     }
397   }
398 noinsert2:;
399   ailen[row] = nrow;
400   PetscFunctionReturn(PETSC_SUCCESS);
401 }
402 
403 /*
404     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
405     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
406 */
407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
408 {
409   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
410   const PetscScalar *value;
411   MatScalar         *barray      = baij->barray;
412   PetscBool          roworiented = baij->roworiented;
413   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
414   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
415   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
416 
417   PetscFunctionBegin;
418   if (!barray) {
419     PetscCall(PetscMalloc1(bs2, &barray));
420     baij->barray = barray;
421   }
422 
423   if (roworiented) stepval = (n - 1) * bs;
424   else stepval = (m - 1) * bs;
425 
426   for (i = 0; i < m; i++) {
427     if (im[i] < 0) continue;
428     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
429     if (im[i] >= rstart && im[i] < rend) {
430       row = im[i] - rstart;
431       for (j = 0; j < n; j++) {
432         /* If NumCol = 1 then a copy is not required */
433         if ((roworiented) && (n == 1)) {
434           barray = (MatScalar *)v + i * bs2;
435         } else if ((!roworiented) && (m == 1)) {
436           barray = (MatScalar *)v + j * bs2;
437         } else { /* Here a copy is required */
438           if (roworiented) {
439             value = v + (i * (stepval + bs) + j) * bs;
440           } else {
441             value = v + (j * (stepval + bs) + i) * bs;
442           }
443           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
444             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
445             barray += bs;
446           }
447           barray -= bs2;
448         }
449 
450         if (in[j] >= cstart && in[j] < cend) {
451           col = in[j] - cstart;
452           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
453         } else if (in[j] < 0) {
454           continue;
455         } else {
456           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
457           if (mat->was_assembled) {
458             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
459 
460 #if defined(PETSC_USE_CTABLE)
461             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
462             col = col < 1 ? -1 : (col - 1) / bs;
463 #else
464             col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs;
465 #endif
466             if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) {
467               PetscCall(MatDisAssemble_MPIBAIJ(mat));
468               col = in[j];
469             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
470           } else col = in[j];
471           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
472         }
473       }
474     } else {
475       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
476       if (!baij->donotstash) {
477         if (roworiented) {
478           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
479         } else {
480           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
481         }
482       }
483     }
484   }
485   PetscFunctionReturn(PETSC_SUCCESS);
486 }
487 
488 #define HASH_KEY             0.6180339887
489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
493 {
494   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
495   PetscBool    roworiented = baij->roworiented;
496   PetscInt     i, j, row, col;
497   PetscInt     rstart_orig = mat->rmap->rstart;
498   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
499   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
500   PetscReal    tmp;
501   MatScalar  **HD       = baij->hd, value;
502   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
503 
504   PetscFunctionBegin;
505   for (i = 0; i < m; i++) {
506     if (PetscDefined(USE_DEBUG)) {
507       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
508       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
509     }
510     row = im[i];
511     if (row >= rstart_orig && row < rend_orig) {
512       for (j = 0; j < n; j++) {
513         col = in[j];
514         if (roworiented) value = v[i * n + j];
515         else value = v[i + j * m];
516         /* Look up PetscInto the Hash Table */
517         key = (row / bs) * Nbs + (col / bs) + 1;
518         h1  = HASH(size, key, tmp);
519 
520         idx = h1;
521         if (PetscDefined(USE_DEBUG)) {
522           insert_ct++;
523           total_ct++;
524           if (HT[idx] != key) {
525             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
526             if (idx == size) {
527               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
528               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
529             }
530           }
531         } else if (HT[idx] != key) {
532           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
533           if (idx == size) {
534             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
535             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
536           }
537         }
538         /* A HASH table entry is found, so insert the values at the correct address */
539         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
540         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
541       }
542     } else if (!baij->donotstash) {
543       if (roworiented) {
544         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
545       } else {
546         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
547       }
548     }
549   }
550   if (PetscDefined(USE_DEBUG)) {
551     baij->ht_total_ct += total_ct;
552     baij->ht_insert_ct += insert_ct;
553   }
554   PetscFunctionReturn(PETSC_SUCCESS);
555 }
556 
557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
558 {
559   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
560   PetscBool          roworiented = baij->roworiented;
561   PetscInt           i, j, ii, jj, row, col;
562   PetscInt           rstart = baij->rstartbs;
563   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
564   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
565   PetscReal          tmp;
566   MatScalar        **HD = baij->hd, *baij_a;
567   const PetscScalar *v_t, *value;
568   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
569 
570   PetscFunctionBegin;
571   if (roworiented) stepval = (n - 1) * bs;
572   else stepval = (m - 1) * bs;
573 
574   for (i = 0; i < m; i++) {
575     if (PetscDefined(USE_DEBUG)) {
576       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
577       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
578     }
579     row = im[i];
580     v_t = v + i * nbs2;
581     if (row >= rstart && row < rend) {
582       for (j = 0; j < n; j++) {
583         col = in[j];
584 
585         /* Look up into the Hash Table */
586         key = row * Nbs + col + 1;
587         h1  = HASH(size, key, tmp);
588 
589         idx = h1;
590         if (PetscDefined(USE_DEBUG)) {
591           total_ct++;
592           insert_ct++;
593           if (HT[idx] != key) {
594             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++);
595             if (idx == size) {
596               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++);
597               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
598             }
599           }
600         } else if (HT[idx] != key) {
601           for (idx = h1; (idx < size) && (HT[idx] != key); idx++);
602           if (idx == size) {
603             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++);
604             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
605           }
606         }
607         baij_a = HD[idx];
608         if (roworiented) {
609           /*value = v + i*(stepval+bs)*bs + j*bs;*/
610           /* value = v + (i*(stepval+bs)+j)*bs; */
611           value = v_t;
612           v_t += bs;
613           if (addv == ADD_VALUES) {
614             for (ii = 0; ii < bs; ii++, value += stepval) {
615               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
616             }
617           } else {
618             for (ii = 0; ii < bs; ii++, value += stepval) {
619               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
620             }
621           }
622         } else {
623           value = v + j * (stepval + bs) * bs + i * bs;
624           if (addv == ADD_VALUES) {
625             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
626               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
627             }
628           } else {
629             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
630               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
631             }
632           }
633         }
634       }
635     } else {
636       if (!baij->donotstash) {
637         if (roworiented) {
638           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
639         } else {
640           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
641         }
642       }
643     }
644   }
645   if (PetscDefined(USE_DEBUG)) {
646     baij->ht_total_ct += total_ct;
647     baij->ht_insert_ct += insert_ct;
648   }
649   PetscFunctionReturn(PETSC_SUCCESS);
650 }
651 
652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
653 {
654   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
655   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
656   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
657 
658   PetscFunctionBegin;
659   for (i = 0; i < m; i++) {
660     if (idxm[i] < 0) continue; /* negative row */
661     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
662     PetscCheck(idxm[i] >= bsrstart && idxm[i] < bsrend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
663     row = idxm[i] - bsrstart;
664     for (j = 0; j < n; j++) {
665       if (idxn[j] < 0) continue; /* negative column */
666       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
667       if (idxn[j] >= bscstart && idxn[j] < bscend) {
668         col = idxn[j] - bscstart;
669         PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
670       } else {
671         if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
672 #if defined(PETSC_USE_CTABLE)
673         PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
674         data--;
675 #else
676         data = baij->colmap[idxn[j] / bs] - 1;
677 #endif
678         if (data < 0 || baij->garray[data / bs] != idxn[j] / bs) *(v + i * n + j) = 0.0;
679         else {
680           col = data + idxn[j] % bs;
681           PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
682         }
683       }
684     }
685   }
686   PetscFunctionReturn(PETSC_SUCCESS);
687 }
688 
689 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm)
690 {
691   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
692   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
693   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
694   PetscReal    sum = 0.0;
695   MatScalar   *v;
696 
697   PetscFunctionBegin;
698   if (baij->size == 1) {
699     PetscCall(MatNorm(baij->A, type, nrm));
700   } else {
701     if (type == NORM_FROBENIUS) {
702       v  = amat->a;
703       nz = amat->nz * bs2;
704       for (i = 0; i < nz; i++) {
705         sum += PetscRealPart(PetscConj(*v) * (*v));
706         v++;
707       }
708       v  = bmat->a;
709       nz = bmat->nz * bs2;
710       for (i = 0; i < nz; i++) {
711         sum += PetscRealPart(PetscConj(*v) * (*v));
712         v++;
713       }
714       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
715       *nrm = PetscSqrtReal(*nrm);
716     } else if (type == NORM_1) { /* max column sum */
717       Vec          col, bcol;
718       PetscScalar *array;
719       PetscInt    *jj, *garray = baij->garray;
720 
721       PetscCall(MatCreateVecs(mat, &col, NULL));
722       PetscCall(VecSet(col, 0.0));
723       PetscCall(VecGetArrayWrite(col, &array));
724       v  = amat->a;
725       jj = amat->j;
726       for (i = 0; i < amat->nz; i++) {
727         for (j = 0; j < bs; j++) {
728           PetscInt col = bs * *jj + j; /* column index */
729 
730           for (row = 0; row < bs; row++) array[col] += PetscAbsScalar(*v++);
731         }
732         jj++;
733       }
734       PetscCall(VecRestoreArrayWrite(col, &array));
735       PetscCall(MatCreateVecs(baij->B, &bcol, NULL));
736       PetscCall(VecSet(bcol, 0.0));
737       PetscCall(VecGetArrayWrite(bcol, &array));
738       v  = bmat->a;
739       jj = bmat->j;
740       for (i = 0; i < bmat->nz; i++) {
741         for (j = 0; j < bs; j++) {
742           PetscInt col = bs * *jj + j; /* column index */
743 
744           for (row = 0; row < bs; row++) array[col] += PetscAbsScalar(*v++);
745         }
746         jj++;
747       }
748       PetscCall(VecSetValuesBlocked(col, bmat->nbs, garray, array, ADD_VALUES));
749       PetscCall(VecRestoreArrayWrite(bcol, &array));
750       PetscCall(VecDestroy(&bcol));
751       PetscCall(VecAssemblyBegin(col));
752       PetscCall(VecAssemblyEnd(col));
753       PetscCall(VecNorm(col, NORM_INFINITY, nrm));
754       PetscCall(VecDestroy(&col));
755     } else if (type == NORM_INFINITY) { /* max row sum */
756       PetscReal *sums;
757       PetscCall(PetscMalloc1(bs, &sums));
758       sum = 0.0;
759       for (j = 0; j < amat->mbs; j++) {
760         for (row = 0; row < bs; row++) sums[row] = 0.0;
761         v  = amat->a + bs2 * amat->i[j];
762         nz = amat->i[j + 1] - amat->i[j];
763         for (i = 0; i < nz; i++) {
764           for (col = 0; col < bs; col++) {
765             for (row = 0; row < bs; row++) {
766               sums[row] += PetscAbsScalar(*v);
767               v++;
768             }
769           }
770         }
771         v  = bmat->a + bs2 * bmat->i[j];
772         nz = bmat->i[j + 1] - bmat->i[j];
773         for (i = 0; i < nz; i++) {
774           for (col = 0; col < bs; col++) {
775             for (row = 0; row < bs; row++) {
776               sums[row] += PetscAbsScalar(*v);
777               v++;
778             }
779           }
780         }
781         for (row = 0; row < bs; row++) {
782           if (sums[row] > sum) sum = sums[row];
783         }
784       }
785       PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
786       PetscCall(PetscFree(sums));
787     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
788   }
789   PetscFunctionReturn(PETSC_SUCCESS);
790 }
791 
792 /*
793   Creates the hash table, and sets the table
794   This table is created only once.
795   If new entries need to be added to the matrix
796   then the hash table has to be destroyed and
797   recreated.
798 */
799 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor)
800 {
801   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
802   Mat          A = baij->A, B = baij->B;
803   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
804   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
805   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
806   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
807   PetscInt    *HT, key;
808   MatScalar  **HD;
809   PetscReal    tmp;
810 #if defined(PETSC_USE_INFO)
811   PetscInt ct = 0, max = 0;
812 #endif
813 
814   PetscFunctionBegin;
815   if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS);
816 
817   baij->ht_size = (PetscInt)(factor * nz);
818   ht_size       = baij->ht_size;
819 
820   /* Allocate Memory for Hash Table */
821   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
822   HD = baij->hd;
823   HT = baij->ht;
824 
825   /* Loop Over A */
826   for (i = 0; i < a->mbs; i++) {
827     for (j = ai[i]; j < ai[i + 1]; j++) {
828       row = i + rstart;
829       col = aj[j] + cstart;
830 
831       key = row * Nbs + col + 1;
832       h1  = HASH(ht_size, key, tmp);
833       for (k = 0; k < ht_size; k++) {
834         if (!HT[(h1 + k) % ht_size]) {
835           HT[(h1 + k) % ht_size] = key;
836           HD[(h1 + k) % ht_size] = a->a + j * bs2;
837           break;
838 #if defined(PETSC_USE_INFO)
839         } else {
840           ct++;
841 #endif
842         }
843       }
844 #if defined(PETSC_USE_INFO)
845       if (k > max) max = k;
846 #endif
847     }
848   }
849   /* Loop Over B */
850   for (i = 0; i < b->mbs; i++) {
851     for (j = bi[i]; j < bi[i + 1]; j++) {
852       row = i + rstart;
853       col = garray[bj[j]];
854       key = row * Nbs + col + 1;
855       h1  = HASH(ht_size, key, tmp);
856       for (k = 0; k < ht_size; k++) {
857         if (!HT[(h1 + k) % ht_size]) {
858           HT[(h1 + k) % ht_size] = key;
859           HD[(h1 + k) % ht_size] = b->a + j * bs2;
860           break;
861 #if defined(PETSC_USE_INFO)
862         } else {
863           ct++;
864 #endif
865         }
866       }
867 #if defined(PETSC_USE_INFO)
868       if (k > max) max = k;
869 #endif
870     }
871   }
872 
873   /* Print Summary */
874 #if defined(PETSC_USE_INFO)
875   for (i = 0, j = 0; i < ht_size; i++) {
876     if (HT[i]) j++;
877   }
878   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? 0.0 : (double)(((PetscReal)(ct + j)) / j), max));
879 #endif
880   PetscFunctionReturn(PETSC_SUCCESS);
881 }
882 
883 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode)
884 {
885   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
886   PetscInt     nstash, reallocs;
887 
888   PetscFunctionBegin;
889   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
890 
891   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
892   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
893   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
894   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
895   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
896   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
897   PetscFunctionReturn(PETSC_SUCCESS);
898 }
899 
900 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode)
901 {
902   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
903   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
904   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
905   PetscInt    *row, *col;
906   PetscBool    r1, r2, r3, all_assembled;
907   MatScalar   *val;
908   PetscMPIInt  n;
909 
910   PetscFunctionBegin;
911   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
912   if (!baij->donotstash && !mat->nooffprocentries) {
913     while (1) {
914       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
915       if (!flg) break;
916 
917       for (i = 0; i < n;) {
918         /* Now identify the consecutive vals belonging to the same row */
919         for (j = i, rstart = row[j]; j < n; j++) {
920           if (row[j] != rstart) break;
921         }
922         if (j < n) ncols = j - i;
923         else ncols = n - i;
924         /* Now assemble all these values with a single function call */
925         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
926         i = j;
927       }
928     }
929     PetscCall(MatStashScatterEnd_Private(&mat->stash));
930     /* Now process the block-stash. Since the values are stashed column-oriented,
931        set the row-oriented flag to column-oriented, and after MatSetValues()
932        restore the original flags */
933     r1 = baij->roworiented;
934     r2 = a->roworiented;
935     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
936 
937     baij->roworiented                           = PETSC_FALSE;
938     a->roworiented                              = PETSC_FALSE;
939     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE;
940     while (1) {
941       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
942       if (!flg) break;
943 
944       for (i = 0; i < n;) {
945         /* Now identify the consecutive vals belonging to the same row */
946         for (j = i, rstart = row[j]; j < n; j++) {
947           if (row[j] != rstart) break;
948         }
949         if (j < n) ncols = j - i;
950         else ncols = n - i;
951         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
952         i = j;
953       }
954     }
955     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
956 
957     baij->roworiented                           = r1;
958     a->roworiented                              = r2;
959     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3;
960   }
961 
962   PetscCall(MatAssemblyBegin(baij->A, mode));
963   PetscCall(MatAssemblyEnd(baij->A, mode));
964 
965   /* determine if any process has disassembled, if so we must
966      also disassemble ourselves, in order that we may reassemble. */
967   /*
968      if nonzero structure of submatrix B cannot change then we know that
969      no process disassembled thus we can skip this stuff
970   */
971   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
972     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
973     if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
974   }
975 
976   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
977   PetscCall(MatAssemblyBegin(baij->B, mode));
978   PetscCall(MatAssemblyEnd(baij->B, mode));
979 
980 #if defined(PETSC_USE_INFO)
981   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
982     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
983 
984     baij->ht_total_ct  = 0;
985     baij->ht_insert_ct = 0;
986   }
987 #endif
988   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
989     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
990 
991     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
992     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
993   }
994 
995   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
996 
997   baij->rowvalues = NULL;
998 
999   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
1000   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
1001     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
1002     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
1003   }
1004   PetscFunctionReturn(PETSC_SUCCESS);
1005 }
1006 
1007 #include <petscdraw.h>
1008 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1009 {
1010   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
1011   PetscMPIInt       rank = baij->rank;
1012   PetscInt          bs   = mat->rmap->bs;
1013   PetscBool         isascii, isdraw;
1014   PetscViewer       sviewer;
1015   PetscViewerFormat format;
1016 
1017   PetscFunctionBegin;
1018   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1019   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1020   if (isascii) {
1021     PetscCall(PetscViewerGetFormat(viewer, &format));
1022     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1023       MatInfo info;
1024       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1025       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1026       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1027       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1028                                                    mat->rmap->bs, info.memory));
1029       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
1030       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1031       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
1032       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1033       PetscCall(PetscViewerFlush(viewer));
1034       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1035       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1036       PetscCall(VecScatterView(baij->Mvctx, viewer));
1037       PetscFunctionReturn(PETSC_SUCCESS);
1038     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1039       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1040       PetscFunctionReturn(PETSC_SUCCESS);
1041     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1042       PetscFunctionReturn(PETSC_SUCCESS);
1043     }
1044   }
1045 
1046   if (isdraw) {
1047     PetscDraw draw;
1048     PetscBool isnull;
1049     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1050     PetscCall(PetscDrawIsNull(draw, &isnull));
1051     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1052   }
1053 
1054   {
1055     /* assemble the entire matrix onto first processor. */
1056     Mat          A;
1057     Mat_SeqBAIJ *Aloc;
1058     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
1059     MatScalar   *a;
1060     const char  *matname;
1061 
1062     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1063     /* Perhaps this should be the type of mat? */
1064     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1065     if (rank == 0) {
1066       PetscCall(MatSetSizes(A, M, N, M, N));
1067     } else {
1068       PetscCall(MatSetSizes(A, 0, 0, M, N));
1069     }
1070     PetscCall(MatSetType(A, MATMPIBAIJ));
1071     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
1072     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1073 
1074     /* copy over the A part */
1075     Aloc = (Mat_SeqBAIJ *)baij->A->data;
1076     ai   = Aloc->i;
1077     aj   = Aloc->j;
1078     a    = Aloc->a;
1079     PetscCall(PetscMalloc1(bs, &rvals));
1080 
1081     for (i = 0; i < mbs; i++) {
1082       rvals[0] = bs * (baij->rstartbs + i);
1083       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1084       for (j = ai[i]; j < ai[i + 1]; j++) {
1085         col = (baij->cstartbs + aj[j]) * bs;
1086         for (k = 0; k < bs; k++) {
1087           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1088           col++;
1089           a += bs;
1090         }
1091       }
1092     }
1093     /* copy over the B part */
1094     Aloc = (Mat_SeqBAIJ *)baij->B->data;
1095     ai   = Aloc->i;
1096     aj   = Aloc->j;
1097     a    = Aloc->a;
1098     for (i = 0; i < mbs; i++) {
1099       rvals[0] = bs * (baij->rstartbs + i);
1100       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1101       for (j = ai[i]; j < ai[i + 1]; j++) {
1102         col = baij->garray[aj[j]] * bs;
1103         for (k = 0; k < bs; k++) {
1104           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
1105           col++;
1106           a += bs;
1107         }
1108       }
1109     }
1110     PetscCall(PetscFree(rvals));
1111     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1112     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1113     /*
1114        Everyone has to call to draw the matrix since the graphics waits are
1115        synchronized across all processors that share the PetscDraw object
1116     */
1117     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1118     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1119     if (rank == 0) {
1120       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname));
1121       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer));
1122     }
1123     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1124     PetscCall(MatDestroy(&A));
1125   }
1126   PetscFunctionReturn(PETSC_SUCCESS);
1127 }
1128 
1129 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1130 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
1131 {
1132   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1133   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1134   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1135   const PetscInt *garray = aij->garray;
1136   PetscInt        header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l;
1137   PetscCount      nz, hnz;
1138   PetscInt       *rowlens, *colidxs;
1139   PetscScalar    *matvals;
1140   PetscMPIInt     rank;
1141 
1142   PetscFunctionBegin;
1143   PetscCall(PetscViewerSetUp(viewer));
1144 
1145   M  = mat->rmap->N;
1146   N  = mat->cmap->N;
1147   m  = mat->rmap->n;
1148   rs = mat->rmap->rstart;
1149   cs = mat->cmap->rstart;
1150   bs = mat->rmap->bs;
1151   nz = bs * bs * (A->nz + B->nz);
1152 
1153   /* write matrix header */
1154   header[0] = MAT_FILE_CLASSID;
1155   header[1] = M;
1156   header[2] = N;
1157   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1158   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1159   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1160   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1161 
1162   /* fill in and store row lengths */
1163   PetscCall(PetscMalloc1(m, &rowlens));
1164   for (cnt = 0, i = 0; i < A->mbs; i++)
1165     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
1166   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1167   PetscCall(PetscFree(rowlens));
1168 
1169   /* fill in and store column indices */
1170   PetscCall(PetscMalloc1(nz, &colidxs));
1171   for (cnt = 0, i = 0; i < A->mbs; i++) {
1172     for (k = 0; k < bs; k++) {
1173       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1174         if (garray[B->j[jb]] > cs / bs) break;
1175         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1176       }
1177       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1178         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1179       for (; jb < B->i[i + 1]; jb++)
1180         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1181     }
1182   }
1183   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz);
1184   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
1185   PetscCall(PetscFree(colidxs));
1186 
1187   /* fill in and store nonzero values */
1188   PetscCall(PetscMalloc1(nz, &matvals));
1189   for (cnt = 0, i = 0; i < A->mbs; i++) {
1190     for (k = 0; k < bs; k++) {
1191       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1192         if (garray[B->j[jb]] > cs / bs) break;
1193         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1194       }
1195       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
1196         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1197       for (; jb < B->i[i + 1]; jb++)
1198         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1199     }
1200   }
1201   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
1202   PetscCall(PetscFree(matvals));
1203 
1204   /* write block size option to the viewer's .info file */
1205   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1206   PetscFunctionReturn(PETSC_SUCCESS);
1207 }
1208 
1209 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer)
1210 {
1211   PetscBool isascii, isdraw, issocket, isbinary;
1212 
1213   PetscFunctionBegin;
1214   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1215   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1216   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1217   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1218   if (isascii || isdraw || issocket) PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
1219   else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
1220   PetscFunctionReturn(PETSC_SUCCESS);
1221 }
1222 
1223 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy)
1224 {
1225   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1226   PetscInt     nt;
1227 
1228   PetscFunctionBegin;
1229   PetscCall(VecGetLocalSize(xx, &nt));
1230   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
1231   PetscCall(VecGetLocalSize(yy, &nt));
1232   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy");
1233   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1234   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
1235   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1236   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
1237   PetscFunctionReturn(PETSC_SUCCESS);
1238 }
1239 
1240 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1241 {
1242   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1243 
1244   PetscFunctionBegin;
1245   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1246   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1247   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1248   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1249   PetscFunctionReturn(PETSC_SUCCESS);
1250 }
1251 
1252 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy)
1253 {
1254   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1255 
1256   PetscFunctionBegin;
1257   /* do nondiagonal part */
1258   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1259   /* do local part */
1260   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1261   /* add partial results together */
1262   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1263   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1264   PetscFunctionReturn(PETSC_SUCCESS);
1265 }
1266 
1267 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1268 {
1269   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1270 
1271   PetscFunctionBegin;
1272   /* do nondiagonal part */
1273   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1274   /* do local part */
1275   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1276   /* add partial results together */
1277   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1278   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1279   PetscFunctionReturn(PETSC_SUCCESS);
1280 }
1281 
1282 /*
1283   This only works correctly for square matrices where the subblock A->A is the
1284    diagonal block
1285 */
1286 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v)
1287 {
1288   PetscFunctionBegin;
1289   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1290   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
1291   PetscFunctionReturn(PETSC_SUCCESS);
1292 }
1293 
1294 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa)
1295 {
1296   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1297 
1298   PetscFunctionBegin;
1299   PetscCall(MatScale(a->A, aa));
1300   PetscCall(MatScale(a->B, aa));
1301   PetscFunctionReturn(PETSC_SUCCESS);
1302 }
1303 
1304 static PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1305 {
1306   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
1307   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1308   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1309   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1310   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1311 
1312   PetscFunctionBegin;
1313   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1314   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1315   mat->getrowactive = PETSC_TRUE;
1316 
1317   if (!mat->rowvalues && (idx || v)) {
1318     /*
1319         allocate enough space to hold information from the longest row.
1320     */
1321     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1322     PetscInt     max = 1, mbs = mat->mbs, tmp;
1323     for (i = 0; i < mbs; i++) {
1324       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1325       if (max < tmp) max = tmp;
1326     }
1327     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1328   }
1329   lrow = row - brstart;
1330 
1331   pvA = &vworkA;
1332   pcA = &cworkA;
1333   pvB = &vworkB;
1334   pcB = &cworkB;
1335   if (!v) {
1336     pvA = NULL;
1337     pvB = NULL;
1338   }
1339   if (!idx) {
1340     pcA = NULL;
1341     if (!v) pcB = NULL;
1342   }
1343   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1344   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1345   nztot = nzA + nzB;
1346 
1347   cmap = mat->garray;
1348   if (v || idx) {
1349     if (nztot) {
1350       /* Sort by increasing column numbers, assuming A and B already sorted */
1351       PetscInt imark = -1;
1352       if (v) {
1353         *v = v_p = mat->rowvalues;
1354         for (i = 0; i < nzB; i++) {
1355           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1356           else break;
1357         }
1358         imark = i;
1359         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1360         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1361       }
1362       if (idx) {
1363         *idx = idx_p = mat->rowindices;
1364         if (imark > -1) {
1365           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1366         } else {
1367           for (i = 0; i < nzB; i++) {
1368             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1369             else break;
1370           }
1371           imark = i;
1372         }
1373         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1374         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1375       }
1376     } else {
1377       if (idx) *idx = NULL;
1378       if (v) *v = NULL;
1379     }
1380   }
1381   *nz = nztot;
1382   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1383   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1384   PetscFunctionReturn(PETSC_SUCCESS);
1385 }
1386 
1387 static PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1388 {
1389   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1390 
1391   PetscFunctionBegin;
1392   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1393   baij->getrowactive = PETSC_FALSE;
1394   PetscFunctionReturn(PETSC_SUCCESS);
1395 }
1396 
1397 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1398 {
1399   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1400 
1401   PetscFunctionBegin;
1402   PetscCall(MatZeroEntries(l->A));
1403   PetscCall(MatZeroEntries(l->B));
1404   PetscFunctionReturn(PETSC_SUCCESS);
1405 }
1406 
1407 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1408 {
1409   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
1410   Mat            A = a->A, B = a->B;
1411   PetscLogDouble isend[5], irecv[5];
1412 
1413   PetscFunctionBegin;
1414   info->block_size = (PetscReal)matin->rmap->bs;
1415 
1416   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1417 
1418   isend[0] = info->nz_used;
1419   isend[1] = info->nz_allocated;
1420   isend[2] = info->nz_unneeded;
1421   isend[3] = info->memory;
1422   isend[4] = info->mallocs;
1423 
1424   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1425 
1426   isend[0] += info->nz_used;
1427   isend[1] += info->nz_allocated;
1428   isend[2] += info->nz_unneeded;
1429   isend[3] += info->memory;
1430   isend[4] += info->mallocs;
1431 
1432   if (flag == MAT_LOCAL) {
1433     info->nz_used      = isend[0];
1434     info->nz_allocated = isend[1];
1435     info->nz_unneeded  = isend[2];
1436     info->memory       = isend[3];
1437     info->mallocs      = isend[4];
1438   } else if (flag == MAT_GLOBAL_MAX) {
1439     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1440 
1441     info->nz_used      = irecv[0];
1442     info->nz_allocated = irecv[1];
1443     info->nz_unneeded  = irecv[2];
1444     info->memory       = irecv[3];
1445     info->mallocs      = irecv[4];
1446   } else if (flag == MAT_GLOBAL_SUM) {
1447     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1448 
1449     info->nz_used      = irecv[0];
1450     info->nz_allocated = irecv[1];
1451     info->nz_unneeded  = irecv[2];
1452     info->memory       = irecv[3];
1453     info->mallocs      = irecv[4];
1454   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1455   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1456   info->fill_ratio_needed = 0;
1457   info->factor_mallocs    = 0;
1458   PetscFunctionReturn(PETSC_SUCCESS);
1459 }
1460 
1461 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg)
1462 {
1463   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1464 
1465   PetscFunctionBegin;
1466   switch (op) {
1467   case MAT_NEW_NONZERO_LOCATIONS:
1468   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1469   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1470   case MAT_KEEP_NONZERO_PATTERN:
1471   case MAT_NEW_NONZERO_LOCATION_ERR:
1472     MatCheckPreallocated(A, 1);
1473     PetscCall(MatSetOption(a->A, op, flg));
1474     PetscCall(MatSetOption(a->B, op, flg));
1475     break;
1476   case MAT_ROW_ORIENTED:
1477     MatCheckPreallocated(A, 1);
1478     a->roworiented = flg;
1479 
1480     PetscCall(MatSetOption(a->A, op, flg));
1481     PetscCall(MatSetOption(a->B, op, flg));
1482     break;
1483   case MAT_IGNORE_OFF_PROC_ENTRIES:
1484     a->donotstash = flg;
1485     break;
1486   case MAT_USE_HASH_TABLE:
1487     a->ht_flag = flg;
1488     a->ht_fact = 1.39;
1489     break;
1490   case MAT_SYMMETRIC:
1491   case MAT_STRUCTURALLY_SYMMETRIC:
1492   case MAT_HERMITIAN:
1493   case MAT_SYMMETRY_ETERNAL:
1494   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1495   case MAT_SPD_ETERNAL:
1496     /* if the diagonal matrix is square it inherits some of the properties above */
1497     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1498     break;
1499   default:
1500     break;
1501   }
1502   PetscFunctionReturn(PETSC_SUCCESS);
1503 }
1504 
1505 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout)
1506 {
1507   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
1508   Mat_SeqBAIJ *Aloc;
1509   Mat          B;
1510   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1511   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
1512   MatScalar   *a;
1513 
1514   PetscFunctionBegin;
1515   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1516   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1517     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1518     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1519     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1520     /* Do not know preallocation information, but must set block size */
1521     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1522   } else {
1523     B = *matout;
1524   }
1525 
1526   /* copy over the A part */
1527   Aloc = (Mat_SeqBAIJ *)baij->A->data;
1528   ai   = Aloc->i;
1529   aj   = Aloc->j;
1530   a    = Aloc->a;
1531   PetscCall(PetscMalloc1(bs, &rvals));
1532 
1533   for (i = 0; i < mbs; i++) {
1534     rvals[0] = bs * (baij->rstartbs + i);
1535     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1536     for (j = ai[i]; j < ai[i + 1]; j++) {
1537       col = (baij->cstartbs + aj[j]) * bs;
1538       for (k = 0; k < bs; k++) {
1539         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1540 
1541         col++;
1542         a += bs;
1543       }
1544     }
1545   }
1546   /* copy over the B part */
1547   Aloc = (Mat_SeqBAIJ *)baij->B->data;
1548   ai   = Aloc->i;
1549   aj   = Aloc->j;
1550   a    = Aloc->a;
1551   for (i = 0; i < mbs; i++) {
1552     rvals[0] = bs * (baij->rstartbs + i);
1553     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
1554     for (j = ai[i]; j < ai[i + 1]; j++) {
1555       col = baij->garray[aj[j]] * bs;
1556       for (k = 0; k < bs; k++) {
1557         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
1558         col++;
1559         a += bs;
1560       }
1561     }
1562   }
1563   PetscCall(PetscFree(rvals));
1564   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1565   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1566 
1567   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1568   else PetscCall(MatHeaderMerge(A, &B));
1569   PetscFunctionReturn(PETSC_SUCCESS);
1570 }
1571 
1572 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr)
1573 {
1574   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1575   Mat          a = baij->A, b = baij->B;
1576   PetscInt     s1, s2, s3;
1577 
1578   PetscFunctionBegin;
1579   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1580   if (rr) {
1581     PetscCall(VecGetLocalSize(rr, &s1));
1582     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1583     /* Overlap communication with computation. */
1584     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1585   }
1586   if (ll) {
1587     PetscCall(VecGetLocalSize(ll, &s1));
1588     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1589     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1590   }
1591   /* scale  the diagonal block */
1592   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1593 
1594   if (rr) {
1595     /* Do a scatter end and then right scale the off-diagonal block */
1596     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1597     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1598   }
1599   PetscFunctionReturn(PETSC_SUCCESS);
1600 }
1601 
1602 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1603 {
1604   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1605   PetscInt    *lrows;
1606   PetscInt     r, len;
1607   PetscBool    cong;
1608 
1609   PetscFunctionBegin;
1610   /* get locally owned rows */
1611   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
1612   /* fix right-hand side if needed */
1613   if (x && b) {
1614     const PetscScalar *xx;
1615     PetscScalar       *bb;
1616 
1617     PetscCall(VecGetArrayRead(x, &xx));
1618     PetscCall(VecGetArray(b, &bb));
1619     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
1620     PetscCall(VecRestoreArrayRead(x, &xx));
1621     PetscCall(VecRestoreArray(b, &bb));
1622   }
1623 
1624   /* actually zap the local rows */
1625   /*
1626         Zero the required rows. If the "diagonal block" of the matrix
1627      is square and the user wishes to set the diagonal we use separate
1628      code so that MatSetValues() is not called for each diagonal allocating
1629      new memory, thus calling lots of mallocs and slowing things down.
1630 
1631   */
1632   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1633   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
1634   PetscCall(MatHasCongruentLayouts(A, &cong));
1635   if ((diag != 0.0) && cong) {
1636     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1637   } else if (diag != 0.0) {
1638     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1639     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR");
1640     for (r = 0; r < len; ++r) {
1641       const PetscInt row = lrows[r] + A->rmap->rstart;
1642       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1643     }
1644     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1645     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1646   } else {
1647     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1648   }
1649   PetscCall(PetscFree(lrows));
1650 
1651   /* only change matrix nonzero state if pattern was allowed to be changed */
1652   if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) {
1653     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1654     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1655   }
1656   PetscFunctionReturn(PETSC_SUCCESS);
1657 }
1658 
1659 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
1660 {
1661   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1662   PetscMPIInt        n, p = 0;
1663   PetscInt           i, j, k, r, len = 0, row, col, count;
1664   PetscInt          *lrows, *owners = A->rmap->range;
1665   PetscSFNode       *rrows;
1666   PetscSF            sf;
1667   const PetscScalar *xx;
1668   PetscScalar       *bb, *mask;
1669   Vec                xmask, lmask;
1670   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
1671   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1672   PetscScalar       *aa;
1673 
1674   PetscFunctionBegin;
1675   PetscCall(PetscMPIIntCast(A->rmap->n, &n));
1676   /* Create SF where leaves are input rows and roots are owned rows */
1677   PetscCall(PetscMalloc1(n, &lrows));
1678   for (r = 0; r < n; ++r) lrows[r] = -1;
1679   PetscCall(PetscMalloc1(N, &rrows));
1680   for (r = 0; r < N; ++r) {
1681     const PetscInt idx = rows[r];
1682     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
1683     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
1684       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
1685     }
1686     rrows[r].rank  = p;
1687     rrows[r].index = rows[r] - owners[p];
1688   }
1689   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1690   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
1691   /* Collect flags for rows to be zeroed */
1692   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1693   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
1694   PetscCall(PetscSFDestroy(&sf));
1695   /* Compress and put in row numbers */
1696   for (r = 0; r < n; ++r)
1697     if (lrows[r] >= 0) lrows[len++] = r;
1698   /* zero diagonal part of matrix */
1699   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
1700   /* handle off-diagonal part of matrix */
1701   PetscCall(MatCreateVecs(A, &xmask, NULL));
1702   PetscCall(VecDuplicate(l->lvec, &lmask));
1703   PetscCall(VecGetArray(xmask, &bb));
1704   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
1705   PetscCall(VecRestoreArray(xmask, &bb));
1706   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1707   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
1708   PetscCall(VecDestroy(&xmask));
1709   if (x) {
1710     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1711     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
1712     PetscCall(VecGetArrayRead(l->lvec, &xx));
1713     PetscCall(VecGetArray(b, &bb));
1714   }
1715   PetscCall(VecGetArray(lmask, &mask));
1716   /* remove zeroed rows of off-diagonal matrix */
1717   for (i = 0; i < len; ++i) {
1718     row   = lrows[i];
1719     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
1720     aa    = baij->a + baij->i[row / bs] * bs2 + (row % bs);
1721     for (k = 0; k < count; ++k) {
1722       aa[0] = 0.0;
1723       aa += bs;
1724     }
1725   }
1726   /* loop over all elements of off process part of matrix zeroing removed columns*/
1727   for (i = 0; i < l->B->rmap->N; ++i) {
1728     row = i / bs;
1729     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
1730       for (k = 0; k < bs; ++k) {
1731         col = bs * baij->j[j] + k;
1732         if (PetscAbsScalar(mask[col])) {
1733           aa = baij->a + j * bs2 + (i % bs) + bs * k;
1734           if (x) bb[i] -= aa[0] * xx[col];
1735           aa[0] = 0.0;
1736         }
1737       }
1738     }
1739   }
1740   if (x) {
1741     PetscCall(VecRestoreArray(b, &bb));
1742     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1743   }
1744   PetscCall(VecRestoreArray(lmask, &mask));
1745   PetscCall(VecDestroy(&lmask));
1746   PetscCall(PetscFree(lrows));
1747 
1748   /* only change matrix nonzero state if pattern was allowed to be changed */
1749   if (!((Mat_SeqBAIJ *)l->A->data)->nonew) {
1750     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1751     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1752   }
1753   PetscFunctionReturn(PETSC_SUCCESS);
1754 }
1755 
1756 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1757 {
1758   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1759 
1760   PetscFunctionBegin;
1761   PetscCall(MatSetUnfactored(a->A));
1762   PetscFunctionReturn(PETSC_SUCCESS);
1763 }
1764 
1765 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
1766 
1767 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag)
1768 {
1769   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
1770   Mat          a, b, c, d;
1771   PetscBool    flg;
1772 
1773   PetscFunctionBegin;
1774   a = matA->A;
1775   b = matA->B;
1776   c = matB->A;
1777   d = matB->B;
1778 
1779   PetscCall(MatEqual(a, c, &flg));
1780   if (flg) PetscCall(MatEqual(b, d, &flg));
1781   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1782   PetscFunctionReturn(PETSC_SUCCESS);
1783 }
1784 
1785 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str)
1786 {
1787   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1788   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1789 
1790   PetscFunctionBegin;
1791   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1792   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1793     PetscCall(MatCopy_Basic(A, B, str));
1794   } else {
1795     PetscCall(MatCopy(a->A, b->A, str));
1796     PetscCall(MatCopy(a->B, b->B, str));
1797   }
1798   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1799   PetscFunctionReturn(PETSC_SUCCESS);
1800 }
1801 
1802 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
1803 {
1804   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
1805   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
1806   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
1807 
1808   PetscFunctionBegin;
1809   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
1810   PetscFunctionReturn(PETSC_SUCCESS);
1811 }
1812 
1813 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1814 {
1815   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
1816   PetscBLASInt bnz, one                         = 1;
1817   Mat_SeqBAIJ *x, *y;
1818   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
1819 
1820   PetscFunctionBegin;
1821   if (str == SAME_NONZERO_PATTERN) {
1822     PetscScalar alpha = a;
1823     x                 = (Mat_SeqBAIJ *)xx->A->data;
1824     y                 = (Mat_SeqBAIJ *)yy->A->data;
1825     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1826     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1827     x = (Mat_SeqBAIJ *)xx->B->data;
1828     y = (Mat_SeqBAIJ *)yy->B->data;
1829     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1830     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
1831     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1832   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1833     PetscCall(MatAXPY_Basic(Y, a, X, str));
1834   } else {
1835     Mat       B;
1836     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1837     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1838     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1839     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1840     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1841     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1842     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1843     PetscCall(MatSetType(B, MATMPIBAIJ));
1844     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
1845     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1846     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1847     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1848     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1849     PetscCall(MatHeaderMerge(Y, &B));
1850     PetscCall(PetscFree(nnz_d));
1851     PetscCall(PetscFree(nnz_o));
1852   }
1853   PetscFunctionReturn(PETSC_SUCCESS);
1854 }
1855 
1856 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat)
1857 {
1858   PetscFunctionBegin;
1859   if (PetscDefined(USE_COMPLEX)) {
1860     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
1861 
1862     PetscCall(MatConjugate_SeqBAIJ(a->A));
1863     PetscCall(MatConjugate_SeqBAIJ(a->B));
1864   }
1865   PetscFunctionReturn(PETSC_SUCCESS);
1866 }
1867 
1868 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1869 {
1870   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1871 
1872   PetscFunctionBegin;
1873   PetscCall(MatRealPart(a->A));
1874   PetscCall(MatRealPart(a->B));
1875   PetscFunctionReturn(PETSC_SUCCESS);
1876 }
1877 
1878 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1879 {
1880   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1881 
1882   PetscFunctionBegin;
1883   PetscCall(MatImaginaryPart(a->A));
1884   PetscCall(MatImaginaryPart(a->B));
1885   PetscFunctionReturn(PETSC_SUCCESS);
1886 }
1887 
1888 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1889 {
1890   IS       iscol_local;
1891   PetscInt csize;
1892 
1893   PetscFunctionBegin;
1894   PetscCall(ISGetLocalSize(iscol, &csize));
1895   if (call == MAT_REUSE_MATRIX) {
1896     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1897     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1898   } else {
1899     PetscCall(ISAllGather(iscol, &iscol_local));
1900   }
1901   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE));
1902   if (call == MAT_INITIAL_MATRIX) {
1903     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1904     PetscCall(ISDestroy(&iscol_local));
1905   }
1906   PetscFunctionReturn(PETSC_SUCCESS);
1907 }
1908 
1909 /*
1910   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1911   in local and then by concatenating the local matrices the end result.
1912   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1913   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1914 */
1915 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym)
1916 {
1917   PetscMPIInt  rank, size;
1918   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1919   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
1920   Mat          M, Mreuse;
1921   MatScalar   *vwork, *aa;
1922   MPI_Comm     comm;
1923   IS           isrow_new, iscol_new;
1924   Mat_SeqBAIJ *aij;
1925 
1926   PetscFunctionBegin;
1927   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
1928   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1929   PetscCallMPI(MPI_Comm_size(comm, &size));
1930   /* The compression and expansion should be avoided. Doesn't point
1931      out errors, might change the indices, hence buggey */
1932   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
1933   if (isrow == iscol) {
1934     iscol_new = isrow_new;
1935     PetscCall(PetscObjectReference((PetscObject)iscol_new));
1936   } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
1937 
1938   if (call == MAT_REUSE_MATRIX) {
1939     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
1940     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1941     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym));
1942   } else {
1943     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym));
1944   }
1945   PetscCall(ISDestroy(&isrow_new));
1946   PetscCall(ISDestroy(&iscol_new));
1947   /*
1948       m - number of local rows
1949       n - number of columns (same on all processors)
1950       rstart - first row in new global matrix generated
1951   */
1952   PetscCall(MatGetBlockSize(mat, &bs));
1953   PetscCall(MatGetSize(Mreuse, &m, &n));
1954   m = m / bs;
1955   n = n / bs;
1956 
1957   if (call == MAT_INITIAL_MATRIX) {
1958     aij = (Mat_SeqBAIJ *)Mreuse->data;
1959     ii  = aij->i;
1960     jj  = aij->j;
1961 
1962     /*
1963         Determine the number of non-zeros in the diagonal and off-diagonal
1964         portions of the matrix in order to do correct preallocation
1965     */
1966 
1967     /* first get start and end of "diagonal" columns */
1968     if (csize == PETSC_DECIDE) {
1969       PetscCall(ISGetSize(isrow, &mglobal));
1970       if (mglobal == n * bs) { /* square matrix */
1971         nlocal = m;
1972       } else {
1973         nlocal = n / size + ((n % size) > rank);
1974       }
1975     } else {
1976       nlocal = csize / bs;
1977     }
1978     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
1979     rstart = rend - nlocal;
1980     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
1981 
1982     /* next, compute all the lengths */
1983     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
1984     for (i = 0; i < m; i++) {
1985       jend = ii[i + 1] - ii[i];
1986       olen = 0;
1987       dlen = 0;
1988       for (j = 0; j < jend; j++) {
1989         if (*jj < rstart || *jj >= rend) olen++;
1990         else dlen++;
1991         jj++;
1992       }
1993       olens[i] = olen;
1994       dlens[i] = dlen;
1995     }
1996     PetscCall(MatCreate(comm, &M));
1997     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
1998     PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ));
1999     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2000     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
2001     PetscCall(PetscFree2(dlens, olens));
2002   } else {
2003     PetscInt ml, nl;
2004 
2005     M = *newmat;
2006     PetscCall(MatGetLocalSize(M, &ml, &nl));
2007     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
2008     PetscCall(MatZeroEntries(M));
2009     /*
2010          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2011        rather than the slower MatSetValues().
2012     */
2013     M->was_assembled = PETSC_TRUE;
2014     M->assembled     = PETSC_FALSE;
2015   }
2016   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
2017   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
2018   aij = (Mat_SeqBAIJ *)Mreuse->data;
2019   ii  = aij->i;
2020   jj  = aij->j;
2021   aa  = aij->a;
2022   for (i = 0; i < m; i++) {
2023     row   = rstart / bs + i;
2024     nz    = ii[i + 1] - ii[i];
2025     cwork = jj;
2026     jj    = PetscSafePointerPlusOffset(jj, nz);
2027     vwork = aa;
2028     aa    = PetscSafePointerPlusOffset(aa, nz * bs * bs);
2029     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
2030   }
2031 
2032   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
2033   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
2034   *newmat = M;
2035 
2036   /* save submatrix used in processor for next request */
2037   if (call == MAT_INITIAL_MATRIX) {
2038     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
2039     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
2040   }
2041   PetscFunctionReturn(PETSC_SUCCESS);
2042 }
2043 
2044 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B)
2045 {
2046   MPI_Comm        comm, pcomm;
2047   PetscInt        clocal_size, nrows;
2048   const PetscInt *rows;
2049   PetscMPIInt     size;
2050   IS              crowp, lcolp;
2051 
2052   PetscFunctionBegin;
2053   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
2054   /* make a collective version of 'rowp' */
2055   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
2056   if (pcomm == comm) {
2057     crowp = rowp;
2058   } else {
2059     PetscCall(ISGetSize(rowp, &nrows));
2060     PetscCall(ISGetIndices(rowp, &rows));
2061     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
2062     PetscCall(ISRestoreIndices(rowp, &rows));
2063   }
2064   PetscCall(ISSetPermutation(crowp));
2065   /* make a local version of 'colp' */
2066   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
2067   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2068   if (size == 1) {
2069     lcolp = colp;
2070   } else {
2071     PetscCall(ISAllGather(colp, &lcolp));
2072   }
2073   PetscCall(ISSetPermutation(lcolp));
2074   /* now we just get the submatrix */
2075   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
2076   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE));
2077   /* clean up */
2078   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
2079   if (size > 1) PetscCall(ISDestroy(&lcolp));
2080   PetscFunctionReturn(PETSC_SUCCESS);
2081 }
2082 
2083 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
2084 {
2085   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
2086   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
2087 
2088   PetscFunctionBegin;
2089   if (nghosts) *nghosts = B->nbs;
2090   if (ghosts) *ghosts = baij->garray;
2091   PetscFunctionReturn(PETSC_SUCCESS);
2092 }
2093 
2094 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat)
2095 {
2096   Mat          B;
2097   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2098   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2099   Mat_SeqAIJ  *b;
2100   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2101   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2102   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2103 
2104   PetscFunctionBegin;
2105   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2106   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2107 
2108   /*   Tell every processor the number of nonzeros per row  */
2109   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2110   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
2111   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2112   displs = recvcounts + size;
2113   for (i = 0; i < size; i++) {
2114     PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i]));
2115     PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i]));
2116   }
2117   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2118   /* Create the sequential matrix of the same type as the local block diagonal  */
2119   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
2120   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
2121   PetscCall(MatSetType(B, MATSEQAIJ));
2122   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2123   b = (Mat_SeqAIJ *)B->data;
2124 
2125   /*     Copy my part of matrix column indices over  */
2126   sendcount  = ad->nz + bd->nz;
2127   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2128   a_jsendbuf = ad->j;
2129   b_jsendbuf = bd->j;
2130   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2131   cnt        = 0;
2132   for (i = 0; i < n; i++) {
2133     /* put in lower diagonal portion */
2134     m = bd->i[i + 1] - bd->i[i];
2135     while (m > 0) {
2136       /* is it above diagonal (in bd (compressed) numbering) */
2137       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2138       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2139       m--;
2140     }
2141 
2142     /* put in diagonal portion */
2143     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2144 
2145     /* put in upper diagonal portion */
2146     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2147   }
2148   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2149 
2150   /*  Gather all column indices to all processors  */
2151   for (i = 0; i < size; i++) {
2152     recvcounts[i] = 0;
2153     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2154   }
2155   displs[0] = 0;
2156   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
2157   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2158   /*  Assemble the matrix into usable form (note numerical values not yet set)  */
2159   /* set the b->ilen (length of each row) values */
2160   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2161   /* set the b->i indices */
2162   b->i[0] = 0;
2163   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
2164   PetscCall(PetscFree(lens));
2165   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2166   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2167   PetscCall(PetscFree(recvcounts));
2168 
2169   PetscCall(MatPropagateSymmetryOptions(A, B));
2170   *newmat = B;
2171   PetscFunctionReturn(PETSC_SUCCESS);
2172 }
2173 
2174 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2175 {
2176   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2177   Vec          bb1 = NULL;
2178 
2179   PetscFunctionBegin;
2180   if (flag == SOR_APPLY_UPPER) {
2181     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2182     PetscFunctionReturn(PETSC_SUCCESS);
2183   }
2184 
2185   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
2186 
2187   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2188     if (flag & SOR_ZERO_INITIAL_GUESS) {
2189       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2190       its--;
2191     }
2192 
2193     while (its--) {
2194       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2195       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2196 
2197       /* update rhs: bb1 = bb - B*x */
2198       PetscCall(VecScale(mat->lvec, -1.0));
2199       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2200 
2201       /* local sweep */
2202       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2203     }
2204   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2205     if (flag & SOR_ZERO_INITIAL_GUESS) {
2206       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2207       its--;
2208     }
2209     while (its--) {
2210       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2211       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2212 
2213       /* update rhs: bb1 = bb - B*x */
2214       PetscCall(VecScale(mat->lvec, -1.0));
2215       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2216 
2217       /* local sweep */
2218       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2219     }
2220   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2221     if (flag & SOR_ZERO_INITIAL_GUESS) {
2222       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2223       its--;
2224     }
2225     while (its--) {
2226       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2227       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2228 
2229       /* update rhs: bb1 = bb - B*x */
2230       PetscCall(VecScale(mat->lvec, -1.0));
2231       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2232 
2233       /* local sweep */
2234       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2235     }
2236   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2237 
2238   PetscCall(VecDestroy(&bb1));
2239   PetscFunctionReturn(PETSC_SUCCESS);
2240 }
2241 
2242 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions)
2243 {
2244   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2245   PetscInt     m, N, i, *garray = aij->garray;
2246   PetscInt     ib, jb, bs = A->rmap->bs;
2247   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
2248   MatScalar   *a_val = a_aij->a;
2249   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
2250   MatScalar   *b_val = b_aij->a;
2251   PetscReal   *work;
2252 
2253   PetscFunctionBegin;
2254   PetscCall(MatGetSize(A, &m, &N));
2255   PetscCall(PetscCalloc1(N, &work));
2256   if (type == NORM_2) {
2257     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2258       for (jb = 0; jb < bs; jb++) {
2259         for (ib = 0; ib < bs; ib++) {
2260           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2261           a_val++;
2262         }
2263       }
2264     }
2265     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2266       for (jb = 0; jb < bs; jb++) {
2267         for (ib = 0; ib < bs; ib++) {
2268           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2269           b_val++;
2270         }
2271       }
2272     }
2273   } else if (type == NORM_1) {
2274     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2275       for (jb = 0; jb < bs; jb++) {
2276         for (ib = 0; ib < bs; ib++) {
2277           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2278           a_val++;
2279         }
2280       }
2281     }
2282     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2283       for (jb = 0; jb < bs; jb++) {
2284         for (ib = 0; ib < bs; ib++) {
2285           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2286           b_val++;
2287         }
2288       }
2289     }
2290   } else if (type == NORM_INFINITY) {
2291     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2292       for (jb = 0; jb < bs; jb++) {
2293         for (ib = 0; ib < bs; ib++) {
2294           PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2295           work[col]    = PetscMax(PetscAbsScalar(*a_val), work[col]);
2296           a_val++;
2297         }
2298       }
2299     }
2300     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2301       for (jb = 0; jb < bs; jb++) {
2302         for (ib = 0; ib < bs; ib++) {
2303           PetscInt col = garray[b_aij->j[i]] * bs + jb;
2304           work[col]    = PetscMax(PetscAbsScalar(*b_val), work[col]);
2305           b_val++;
2306         }
2307       }
2308     }
2309   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2310     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2311       for (jb = 0; jb < bs; jb++) {
2312         for (ib = 0; ib < bs; ib++) {
2313           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2314           a_val++;
2315         }
2316       }
2317     }
2318     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2319       for (jb = 0; jb < bs; jb++) {
2320         for (ib = 0; ib < bs; ib++) {
2321           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2322           b_val++;
2323         }
2324       }
2325     }
2326   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2327     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2328       for (jb = 0; jb < bs; jb++) {
2329         for (ib = 0; ib < bs; ib++) {
2330           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2331           a_val++;
2332         }
2333       }
2334     }
2335     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2336       for (jb = 0; jb < bs; jb++) {
2337         for (ib = 0; ib < bs; ib++) {
2338           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2339           b_val++;
2340         }
2341       }
2342     }
2343   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2344   if (type == NORM_INFINITY) {
2345     PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
2346   } else {
2347     PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
2348   }
2349   PetscCall(PetscFree(work));
2350   if (type == NORM_2) {
2351     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2352   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2353     for (i = 0; i < N; i++) reductions[i] /= m;
2354   }
2355   PetscFunctionReturn(PETSC_SUCCESS);
2356 }
2357 
2358 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values)
2359 {
2360   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2361 
2362   PetscFunctionBegin;
2363   PetscCall(MatInvertBlockDiagonal(a->A, values));
2364   A->factorerrortype             = a->A->factorerrortype;
2365   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2366   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2367   PetscFunctionReturn(PETSC_SUCCESS);
2368 }
2369 
2370 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a)
2371 {
2372   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
2373   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
2374 
2375   PetscFunctionBegin;
2376   if (!Y->preallocated) {
2377     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
2378   } else if (!aij->nz) {
2379     PetscInt nonew = aij->nonew;
2380     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2381     aij->nonew = nonew;
2382   }
2383   PetscCall(MatShift_Basic(Y, a));
2384   PetscFunctionReturn(PETSC_SUCCESS);
2385 }
2386 
2387 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a)
2388 {
2389   PetscFunctionBegin;
2390   *a = ((Mat_MPIBAIJ *)A->data)->A;
2391   PetscFunctionReturn(PETSC_SUCCESS);
2392 }
2393 
2394 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep)
2395 {
2396   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2397 
2398   PetscFunctionBegin;
2399   PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2400   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2401   PetscFunctionReturn(PETSC_SUCCESS);
2402 }
2403 
2404 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2405                                        MatGetRow_MPIBAIJ,
2406                                        MatRestoreRow_MPIBAIJ,
2407                                        MatMult_MPIBAIJ,
2408                                        /* 4*/ MatMultAdd_MPIBAIJ,
2409                                        MatMultTranspose_MPIBAIJ,
2410                                        MatMultTransposeAdd_MPIBAIJ,
2411                                        NULL,
2412                                        NULL,
2413                                        NULL,
2414                                        /*10*/ NULL,
2415                                        NULL,
2416                                        NULL,
2417                                        MatSOR_MPIBAIJ,
2418                                        MatTranspose_MPIBAIJ,
2419                                        /*15*/ MatGetInfo_MPIBAIJ,
2420                                        MatEqual_MPIBAIJ,
2421                                        MatGetDiagonal_MPIBAIJ,
2422                                        MatDiagonalScale_MPIBAIJ,
2423                                        MatNorm_MPIBAIJ,
2424                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2425                                        MatAssemblyEnd_MPIBAIJ,
2426                                        MatSetOption_MPIBAIJ,
2427                                        MatZeroEntries_MPIBAIJ,
2428                                        /*24*/ MatZeroRows_MPIBAIJ,
2429                                        NULL,
2430                                        NULL,
2431                                        NULL,
2432                                        NULL,
2433                                        /*29*/ MatSetUp_MPI_Hash,
2434                                        NULL,
2435                                        NULL,
2436                                        MatGetDiagonalBlock_MPIBAIJ,
2437                                        NULL,
2438                                        /*34*/ MatDuplicate_MPIBAIJ,
2439                                        NULL,
2440                                        NULL,
2441                                        NULL,
2442                                        NULL,
2443                                        /*39*/ MatAXPY_MPIBAIJ,
2444                                        MatCreateSubMatrices_MPIBAIJ,
2445                                        MatIncreaseOverlap_MPIBAIJ,
2446                                        MatGetValues_MPIBAIJ,
2447                                        MatCopy_MPIBAIJ,
2448                                        /*44*/ NULL,
2449                                        MatScale_MPIBAIJ,
2450                                        MatShift_MPIBAIJ,
2451                                        NULL,
2452                                        MatZeroRowsColumns_MPIBAIJ,
2453                                        /*49*/ NULL,
2454                                        NULL,
2455                                        NULL,
2456                                        NULL,
2457                                        NULL,
2458                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2459                                        NULL,
2460                                        MatSetUnfactored_MPIBAIJ,
2461                                        MatPermute_MPIBAIJ,
2462                                        MatSetValuesBlocked_MPIBAIJ,
2463                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2464                                        MatDestroy_MPIBAIJ,
2465                                        MatView_MPIBAIJ,
2466                                        NULL,
2467                                        NULL,
2468                                        /*64*/ NULL,
2469                                        NULL,
2470                                        NULL,
2471                                        NULL,
2472                                        MatGetRowMaxAbs_MPIBAIJ,
2473                                        /*69*/ NULL,
2474                                        NULL,
2475                                        NULL,
2476                                        MatFDColoringApply_BAIJ,
2477                                        NULL,
2478                                        /*74*/ NULL,
2479                                        NULL,
2480                                        NULL,
2481                                        NULL,
2482                                        MatLoad_MPIBAIJ,
2483                                        /*79*/ NULL,
2484                                        NULL,
2485                                        NULL,
2486                                        NULL,
2487                                        NULL,
2488                                        /*84*/ NULL,
2489                                        NULL,
2490                                        NULL,
2491                                        NULL,
2492                                        NULL,
2493                                        /*89*/ NULL,
2494                                        NULL,
2495                                        NULL,
2496                                        NULL,
2497                                        MatConjugate_MPIBAIJ,
2498                                        /*94*/ NULL,
2499                                        NULL,
2500                                        MatRealPart_MPIBAIJ,
2501                                        MatImaginaryPart_MPIBAIJ,
2502                                        NULL,
2503                                        /*99*/ NULL,
2504                                        NULL,
2505                                        NULL,
2506                                        NULL,
2507                                        NULL,
2508                                        /*104*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2509                                        NULL,
2510                                        MatGetGhosts_MPIBAIJ,
2511                                        NULL,
2512                                        NULL,
2513                                        /*109*/ NULL,
2514                                        NULL,
2515                                        NULL,
2516                                        NULL,
2517                                        MatGetMultiProcBlock_MPIBAIJ,
2518                                        /*114*/ NULL,
2519                                        MatGetColumnReductions_MPIBAIJ,
2520                                        MatInvertBlockDiagonal_MPIBAIJ,
2521                                        NULL,
2522                                        NULL,
2523                                        /*119*/ NULL,
2524                                        NULL,
2525                                        NULL,
2526                                        NULL,
2527                                        NULL,
2528                                        /*124*/ NULL,
2529                                        NULL,
2530                                        MatSetBlockSizes_Default,
2531                                        NULL,
2532                                        MatFDColoringSetUp_MPIXAIJ,
2533                                        /*129*/ NULL,
2534                                        MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2535                                        NULL,
2536                                        NULL,
2537                                        NULL,
2538                                        /*134*/ NULL,
2539                                        NULL,
2540                                        MatEliminateZeros_MPIBAIJ,
2541                                        MatGetRowSumAbs_MPIBAIJ,
2542                                        NULL,
2543                                        /*139*/ NULL,
2544                                        NULL,
2545                                        MatCopyHashToXAIJ_MPI_Hash,
2546                                        NULL,
2547                                        NULL};
2548 
2549 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2550 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2551 
2552 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2553 {
2554   PetscInt        m, rstart, cstart, cend;
2555   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2556   const PetscInt *JJ          = NULL;
2557   PetscScalar    *values      = NULL;
2558   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
2559   PetscBool       nooffprocentries;
2560 
2561   PetscFunctionBegin;
2562   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
2563   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
2564   PetscCall(PetscLayoutSetUp(B->rmap));
2565   PetscCall(PetscLayoutSetUp(B->cmap));
2566   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2567   m      = B->rmap->n / bs;
2568   rstart = B->rmap->rstart / bs;
2569   cstart = B->cmap->rstart / bs;
2570   cend   = B->cmap->rend / bs;
2571 
2572   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2573   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2574   for (i = 0; i < m; i++) {
2575     nz = ii[i + 1] - ii[i];
2576     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2577     nz_max = PetscMax(nz_max, nz);
2578     dlen   = 0;
2579     olen   = 0;
2580     JJ     = jj + ii[i];
2581     for (j = 0; j < nz; j++) {
2582       if (*JJ < cstart || *JJ >= cend) olen++;
2583       else dlen++;
2584       JJ++;
2585     }
2586     d_nnz[i] = dlen;
2587     o_nnz[i] = olen;
2588   }
2589   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2590   PetscCall(PetscFree2(d_nnz, o_nnz));
2591 
2592   values = (PetscScalar *)V;
2593   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2594   for (i = 0; i < m; i++) {
2595     PetscInt        row   = i + rstart;
2596     PetscInt        ncols = ii[i + 1] - ii[i];
2597     const PetscInt *icols = jj + ii[i];
2598     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2599       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2600       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2601     } else { /* block ordering does not match so we can only insert one block at a time. */
2602       PetscInt j;
2603       for (j = 0; j < ncols; j++) {
2604         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2605         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2606       }
2607     }
2608   }
2609 
2610   if (!V) PetscCall(PetscFree(values));
2611   nooffprocentries    = B->nooffprocentries;
2612   B->nooffprocentries = PETSC_TRUE;
2613   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2614   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2615   B->nooffprocentries = nooffprocentries;
2616 
2617   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2618   PetscFunctionReturn(PETSC_SUCCESS);
2619 }
2620 
2621 /*@C
2622   MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2623 
2624   Collective
2625 
2626   Input Parameters:
2627 + B  - the matrix
2628 . bs - the block size
2629 . i  - the indices into `j` for the start of each local row (starts with zero)
2630 . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2631 - v  - optional values in the matrix, use `NULL` if not provided
2632 
2633   Level: advanced
2634 
2635   Notes:
2636   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
2637   thus you CANNOT change the matrix entries by changing the values of `v` after you have
2638   called this routine.
2639 
2640   The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
2641   may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
2642   over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2643   `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2644   block column and the second index is over columns within a block.
2645 
2646   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2647 
2648 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ`
2649 @*/
2650 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2651 {
2652   PetscFunctionBegin;
2653   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2654   PetscValidType(B, 1);
2655   PetscValidLogicalCollectiveInt(B, bs, 2);
2656   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2657   PetscFunctionReturn(PETSC_SUCCESS);
2658 }
2659 
2660 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
2661 {
2662   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
2663   PetscInt     i;
2664   PetscMPIInt  size;
2665 
2666   PetscFunctionBegin;
2667   if (B->hash_active) {
2668     B->ops[0]      = b->cops;
2669     B->hash_active = PETSC_FALSE;
2670   }
2671   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2672   PetscCall(MatSetBlockSize(B, bs));
2673   PetscCall(PetscLayoutSetUp(B->rmap));
2674   PetscCall(PetscLayoutSetUp(B->cmap));
2675   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2676 
2677   if (d_nnz) {
2678     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2679   }
2680   if (o_nnz) {
2681     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2682   }
2683 
2684   b->bs2 = bs * bs;
2685   b->mbs = B->rmap->n / bs;
2686   b->nbs = B->cmap->n / bs;
2687   b->Mbs = B->rmap->N / bs;
2688   b->Nbs = B->cmap->N / bs;
2689 
2690   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2691   b->rstartbs = B->rmap->rstart / bs;
2692   b->rendbs   = B->rmap->rend / bs;
2693   b->cstartbs = B->cmap->rstart / bs;
2694   b->cendbs   = B->cmap->rend / bs;
2695 
2696 #if defined(PETSC_USE_CTABLE)
2697   PetscCall(PetscHMapIDestroy(&b->colmap));
2698 #else
2699   PetscCall(PetscFree(b->colmap));
2700 #endif
2701   PetscCall(PetscFree(b->garray));
2702   PetscCall(VecDestroy(&b->lvec));
2703   PetscCall(VecScatterDestroy(&b->Mvctx));
2704 
2705   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2706 
2707   MatSeqXAIJGetOptions_Private(b->B);
2708   PetscCall(MatDestroy(&b->B));
2709   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2710   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2711   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2712   MatSeqXAIJRestoreOptions_Private(b->B);
2713 
2714   MatSeqXAIJGetOptions_Private(b->A);
2715   PetscCall(MatDestroy(&b->A));
2716   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2717   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2718   PetscCall(MatSetType(b->A, MATSEQBAIJ));
2719   MatSeqXAIJRestoreOptions_Private(b->A);
2720 
2721   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
2722   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2723   B->preallocated  = PETSC_TRUE;
2724   B->was_assembled = PETSC_FALSE;
2725   B->assembled     = PETSC_FALSE;
2726   PetscFunctionReturn(PETSC_SUCCESS);
2727 }
2728 
2729 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
2730 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
2731 
2732 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj)
2733 {
2734   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
2735   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
2736   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
2737   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2738 
2739   PetscFunctionBegin;
2740   PetscCall(PetscMalloc1(M + 1, &ii));
2741   ii[0] = 0;
2742   for (i = 0; i < M; i++) {
2743     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
2744     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
2745     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
2746     /* remove one from count of matrix has diagonal */
2747     for (j = id[i]; j < id[i + 1]; j++) {
2748       if (jd[j] == i) {
2749         ii[i + 1]--;
2750         break;
2751       }
2752     }
2753   }
2754   PetscCall(PetscMalloc1(ii[M], &jj));
2755   cnt = 0;
2756   for (i = 0; i < M; i++) {
2757     for (j = io[i]; j < io[i + 1]; j++) {
2758       if (garray[jo[j]] > rstart) break;
2759       jj[cnt++] = garray[jo[j]];
2760     }
2761     for (k = id[i]; k < id[i + 1]; k++) {
2762       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
2763     }
2764     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
2765   }
2766   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
2767   PetscFunctionReturn(PETSC_SUCCESS);
2768 }
2769 
2770 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2771 
2772 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2773 
2774 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
2775 {
2776   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2777   Mat_MPIAIJ  *b;
2778   Mat          B;
2779 
2780   PetscFunctionBegin;
2781   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
2782 
2783   if (reuse == MAT_REUSE_MATRIX) {
2784     B = *newmat;
2785   } else {
2786     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
2787     PetscCall(MatSetType(B, MATMPIAIJ));
2788     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
2789     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
2790     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
2791     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
2792   }
2793   b = (Mat_MPIAIJ *)B->data;
2794 
2795   if (reuse == MAT_REUSE_MATRIX) {
2796     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
2797     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
2798   } else {
2799     PetscInt   *garray = a->garray;
2800     Mat_SeqAIJ *bB;
2801     PetscInt    bs, nnz;
2802     PetscCall(MatDestroy(&b->A));
2803     PetscCall(MatDestroy(&b->B));
2804     /* just clear out the data structure */
2805     PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_FALSE));
2806     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
2807     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
2808 
2809     /* Global numbering for b->B columns */
2810     bB  = (Mat_SeqAIJ *)b->B->data;
2811     bs  = A->rmap->bs;
2812     nnz = bB->i[A->rmap->n];
2813     for (PetscInt k = 0; k < nnz; k++) {
2814       PetscInt bj = bB->j[k] / bs;
2815       PetscInt br = bB->j[k] % bs;
2816       bB->j[k]    = garray[bj] * bs + br;
2817     }
2818   }
2819   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2820   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2821 
2822   if (reuse == MAT_INPLACE_MATRIX) {
2823     PetscCall(MatHeaderReplace(A, &B));
2824   } else {
2825     *newmat = B;
2826   }
2827   PetscFunctionReturn(PETSC_SUCCESS);
2828 }
2829 
2830 /*MC
2831    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2832 
2833    Options Database Keys:
2834 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
2835 . -mat_block_size <bs> - set the blocksize used to store the matrix
2836 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
2837 - -mat_use_hash_table <fact> - set hash table factor
2838 
2839    Level: beginner
2840 
2841    Note:
2842     `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no
2843     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
2844 
2845 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ`
2846 M*/
2847 
2848 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2849 
2850 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2851 {
2852   Mat_MPIBAIJ *b;
2853   PetscBool    flg = PETSC_FALSE;
2854 
2855   PetscFunctionBegin;
2856   PetscCall(PetscNew(&b));
2857   B->data      = (void *)b;
2858   B->ops[0]    = MatOps_Values;
2859   B->assembled = PETSC_FALSE;
2860 
2861   B->insertmode = NOT_SET_VALUES;
2862   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2863   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2864 
2865   /* build local table of row and column ownerships */
2866   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2867 
2868   /* build cache for off array entries formed */
2869   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2870 
2871   b->donotstash  = PETSC_FALSE;
2872   b->colmap      = NULL;
2873   b->garray      = NULL;
2874   b->roworiented = PETSC_TRUE;
2875 
2876   /* stuff used in block assembly */
2877   b->barray = NULL;
2878 
2879   /* stuff used for matrix vector multiply */
2880   b->lvec  = NULL;
2881   b->Mvctx = NULL;
2882 
2883   /* stuff for MatGetRow() */
2884   b->rowindices   = NULL;
2885   b->rowvalues    = NULL;
2886   b->getrowactive = PETSC_FALSE;
2887 
2888   /* hash table stuff */
2889   b->ht           = NULL;
2890   b->hd           = NULL;
2891   b->ht_size      = 0;
2892   b->ht_flag      = PETSC_FALSE;
2893   b->ht_fact      = 0;
2894   b->ht_total_ct  = 0;
2895   b->ht_insert_ct = 0;
2896 
2897   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2898   b->ijonly = PETSC_FALSE;
2899 
2900   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
2901   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
2902   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
2903 #if defined(PETSC_HAVE_HYPRE)
2904   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
2905 #endif
2906   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
2907   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
2908   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
2909   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
2910   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
2911   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
2912   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
2913   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
2914 
2915   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
2916   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
2917   if (flg) {
2918     PetscReal fact = 1.39;
2919     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2920     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2921     if (fact <= 1.0) fact = 1.39;
2922     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2923     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2924   }
2925   PetscOptionsEnd();
2926   PetscFunctionReturn(PETSC_SUCCESS);
2927 }
2928 
2929 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2930 /*MC
2931    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2932 
2933    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
2934    and `MATMPIBAIJ` otherwise.
2935 
2936    Options Database Keys:
2937 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2938 
2939   Level: beginner
2940 
2941 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2942 M*/
2943 
2944 /*@
2945   MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2946   (block compressed row).
2947 
2948   Collective
2949 
2950   Input Parameters:
2951 + B     - the matrix
2952 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2953           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2954 . d_nz  - number of block nonzeros per block row in diagonal portion of local
2955            submatrix  (same for all local rows)
2956 . d_nnz - array containing the number of block nonzeros in the various block rows
2957            of the in diagonal portion of the local (possibly different for each block
2958            row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry and
2959            set it even if it is zero.
2960 . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2961            submatrix (same for all local rows).
2962 - o_nnz - array containing the number of nonzeros in the various block rows of the
2963            off-diagonal portion of the local submatrix (possibly different for
2964            each block row) or `NULL`.
2965 
2966    If the *_nnz parameter is given then the *_nz parameter is ignored
2967 
2968   Options Database Keys:
2969 + -mat_block_size            - size of the blocks to use
2970 - -mat_use_hash_table <fact> - set hash table factor
2971 
2972   Level: intermediate
2973 
2974   Notes:
2975   For good matrix assembly performance
2976   the user should preallocate the matrix storage by setting the parameters
2977   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
2978   performance can be increased by more than a factor of 50.
2979 
2980   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2981   than it must be used on all processors that share the object for that argument.
2982 
2983   Storage Information:
2984   For a square global matrix we define each processor's diagonal portion
2985   to be its local rows and the corresponding columns (a square submatrix);
2986   each processor's off-diagonal portion encompasses the remainder of the
2987   local matrix (a rectangular submatrix).
2988 
2989   The user can specify preallocated storage for the diagonal part of
2990   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
2991   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2992   memory allocation.  Likewise, specify preallocated storage for the
2993   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2994 
2995   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2996   the figure below we depict these three local rows and all columns (0-11).
2997 
2998 .vb
2999            0 1 2 3 4 5 6 7 8 9 10 11
3000           --------------------------
3001    row 3  |o o o d d d o o o o  o  o
3002    row 4  |o o o d d d o o o o  o  o
3003    row 5  |o o o d d d o o o o  o  o
3004           --------------------------
3005 .ve
3006 
3007   Thus, any entries in the d locations are stored in the d (diagonal)
3008   submatrix, and any entries in the o locations are stored in the
3009   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3010   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3011 
3012   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3013   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3014   In general, for PDE problems in which most nonzeros are near the diagonal,
3015   one expects `d_nz` >> `o_nz`.
3016 
3017   You can call `MatGetInfo()` to get information on how effective the preallocation was;
3018   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3019   You can also run with the option `-info` and look for messages with the string
3020   malloc in them to see if additional memory allocation was needed.
3021 
3022 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
3023 @*/
3024 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
3025 {
3026   PetscFunctionBegin;
3027   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
3028   PetscValidType(B, 1);
3029   PetscValidLogicalCollectiveInt(B, bs, 2);
3030   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
3031   PetscFunctionReturn(PETSC_SUCCESS);
3032 }
3033 
3034 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
3035 /*@
3036   MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
3037   (block compressed row).
3038 
3039   Collective
3040 
3041   Input Parameters:
3042 + comm  - MPI communicator
3043 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3044           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3045 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
3046           This value should be the same as the local size used in creating the
3047           y vector for the matrix-vector product y = Ax.
3048 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
3049           This value should be the same as the local size used in creating the
3050           x vector for the matrix-vector product y = Ax.
3051 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
3052 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3053 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
3054           submatrix  (same for all local rows)
3055 . d_nnz - array containing the number of nonzero blocks in the various block rows
3056           of the in diagonal portion of the local (possibly different for each block
3057           row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3058           and set it even if it is zero.
3059 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3060           submatrix (same for all local rows).
3061 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3062           off-diagonal portion of the local submatrix (possibly different for
3063           each block row) or NULL.
3064 
3065   Output Parameter:
3066 . A - the matrix
3067 
3068   Options Database Keys:
3069 + -mat_block_size            - size of the blocks to use
3070 - -mat_use_hash_table <fact> - set hash table factor
3071 
3072   Level: intermediate
3073 
3074   Notes:
3075   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3076   MatXXXXSetPreallocation() paradigm instead of this routine directly.
3077   [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3078 
3079   For good matrix assembly performance
3080   the user should preallocate the matrix storage by setting the parameters
3081   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).  By setting these parameters accurately,
3082   performance can be increased by more than a factor of 50.
3083 
3084   If the *_nnz parameter is given then the *_nz parameter is ignored
3085 
3086   A nonzero block is any block that as 1 or more nonzeros in it
3087 
3088   The user MUST specify either the local or global matrix dimensions
3089   (possibly both).
3090 
3091   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3092   than it must be used on all processors that share the object for that argument.
3093 
3094   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
3095   `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
3096 
3097   Storage Information:
3098   For a square global matrix we define each processor's diagonal portion
3099   to be its local rows and the corresponding columns (a square submatrix);
3100   each processor's off-diagonal portion encompasses the remainder of the
3101   local matrix (a rectangular submatrix).
3102 
3103   The user can specify preallocated storage for the diagonal part of
3104   the local submatrix with either d_nz or d_nnz (not both).  Set
3105   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
3106   memory allocation.  Likewise, specify preallocated storage for the
3107   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
3108 
3109   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3110   the figure below we depict these three local rows and all columns (0-11).
3111 
3112 .vb
3113            0 1 2 3 4 5 6 7 8 9 10 11
3114           --------------------------
3115    row 3  |o o o d d d o o o o  o  o
3116    row 4  |o o o d d d o o o o  o  o
3117    row 5  |o o o d d d o o o o  o  o
3118           --------------------------
3119 .ve
3120 
3121   Thus, any entries in the d locations are stored in the d (diagonal)
3122   submatrix, and any entries in the o locations are stored in the
3123   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3124   stored simply in the `MATSEQBAIJ` format for compressed row storage.
3125 
3126   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
3127   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
3128   In general, for PDE problems in which most nonzeros are near the diagonal,
3129   one expects `d_nz` >> `o_nz`.
3130 
3131 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`,
3132           `MatGetOwnershipRange()`,  `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout`
3133 @*/
3134 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
3135 {
3136   PetscMPIInt size;
3137 
3138   PetscFunctionBegin;
3139   PetscCall(MatCreate(comm, A));
3140   PetscCall(MatSetSizes(*A, m, n, M, N));
3141   PetscCallMPI(MPI_Comm_size(comm, &size));
3142   if (size > 1) {
3143     PetscCall(MatSetType(*A, MATMPIBAIJ));
3144     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3145   } else {
3146     PetscCall(MatSetType(*A, MATSEQBAIJ));
3147     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
3148   }
3149   PetscFunctionReturn(PETSC_SUCCESS);
3150 }
3151 
3152 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
3153 {
3154   Mat          mat;
3155   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3156   PetscInt     len = 0;
3157 
3158   PetscFunctionBegin;
3159   *newmat = NULL;
3160   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
3161   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
3162   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
3163 
3164   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3165   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3166   if (matin->hash_active) {
3167     PetscCall(MatSetUp(mat));
3168   } else {
3169     mat->factortype   = matin->factortype;
3170     mat->preallocated = PETSC_TRUE;
3171     mat->assembled    = PETSC_TRUE;
3172     mat->insertmode   = NOT_SET_VALUES;
3173 
3174     a             = (Mat_MPIBAIJ *)mat->data;
3175     mat->rmap->bs = matin->rmap->bs;
3176     a->bs2        = oldmat->bs2;
3177     a->mbs        = oldmat->mbs;
3178     a->nbs        = oldmat->nbs;
3179     a->Mbs        = oldmat->Mbs;
3180     a->Nbs        = oldmat->Nbs;
3181 
3182     a->size         = oldmat->size;
3183     a->rank         = oldmat->rank;
3184     a->donotstash   = oldmat->donotstash;
3185     a->roworiented  = oldmat->roworiented;
3186     a->rowindices   = NULL;
3187     a->rowvalues    = NULL;
3188     a->getrowactive = PETSC_FALSE;
3189     a->barray       = NULL;
3190     a->rstartbs     = oldmat->rstartbs;
3191     a->rendbs       = oldmat->rendbs;
3192     a->cstartbs     = oldmat->cstartbs;
3193     a->cendbs       = oldmat->cendbs;
3194 
3195     /* hash table stuff */
3196     a->ht           = NULL;
3197     a->hd           = NULL;
3198     a->ht_size      = 0;
3199     a->ht_flag      = oldmat->ht_flag;
3200     a->ht_fact      = oldmat->ht_fact;
3201     a->ht_total_ct  = 0;
3202     a->ht_insert_ct = 0;
3203 
3204     PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
3205     if (oldmat->colmap) {
3206 #if defined(PETSC_USE_CTABLE)
3207       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3208 #else
3209       PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
3210       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
3211 #endif
3212     } else a->colmap = NULL;
3213 
3214     if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) {
3215       PetscCall(PetscMalloc1(len, &a->garray));
3216       PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3217     } else a->garray = NULL;
3218 
3219     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
3220     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3221     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3222 
3223     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3224     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3225   }
3226   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3227   *newmat = mat;
3228   PetscFunctionReturn(PETSC_SUCCESS);
3229 }
3230 
3231 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3232 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer)
3233 {
3234   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3235   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3236   PetscScalar *matvals;
3237 
3238   PetscFunctionBegin;
3239   PetscCall(PetscViewerSetUp(viewer));
3240 
3241   /* read in matrix header */
3242   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3243   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3244   M  = header[1];
3245   N  = header[2];
3246   nz = header[3];
3247   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3248   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3249   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3250 
3251   /* set block sizes from the viewer's .info file */
3252   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3253   /* set local sizes if not set already */
3254   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3255   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3256   /* set global sizes if not set already */
3257   if (mat->rmap->N < 0) mat->rmap->N = M;
3258   if (mat->cmap->N < 0) mat->cmap->N = N;
3259   PetscCall(PetscLayoutSetUp(mat->rmap));
3260   PetscCall(PetscLayoutSetUp(mat->cmap));
3261 
3262   /* check if the matrix sizes are correct */
3263   PetscCall(MatGetSize(mat, &rows, &cols));
3264   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3265   PetscCall(MatGetBlockSize(mat, &bs));
3266   PetscCall(MatGetLocalSize(mat, &m, &n));
3267   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
3268   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
3269   mbs = m / bs;
3270   nbs = n / bs;
3271 
3272   /* read in row lengths and build row indices */
3273   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3274   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3275   rowidxs[0] = 0;
3276   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3277   PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3278   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3279 
3280   /* read in column indices and matrix values */
3281   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3282   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3283   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3284 
3285   {                /* preallocate matrix storage */
3286     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3287     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3288     PetscBool  sbaij, done;
3289     PetscInt  *d_nnz, *o_nnz;
3290 
3291     PetscCall(PetscBTCreate(nbs, &bt));
3292     PetscCall(PetscHSetICreate(&ht));
3293     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
3294     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3295     for (i = 0; i < mbs; i++) {
3296       PetscCall(PetscBTMemzero(nbs, bt));
3297       PetscCall(PetscHSetIClear(ht));
3298       for (k = 0; k < bs; k++) {
3299         PetscInt row = bs * i + k;
3300         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3301           PetscInt col = colidxs[j];
3302           if (!sbaij || col >= row) {
3303             if (col >= cs && col < ce) {
3304               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3305             } else {
3306               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3307               if (done) o_nnz[i]++;
3308             }
3309           }
3310         }
3311       }
3312     }
3313     PetscCall(PetscBTDestroy(&bt));
3314     PetscCall(PetscHSetIDestroy(&ht));
3315     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3316     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
3317     PetscCall(PetscFree2(d_nnz, o_nnz));
3318   }
3319 
3320   /* store matrix values */
3321   for (i = 0; i < m; i++) {
3322     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
3323     PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3324   }
3325 
3326   PetscCall(PetscFree(rowidxs));
3327   PetscCall(PetscFree2(colidxs, matvals));
3328   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3329   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3330   PetscFunctionReturn(PETSC_SUCCESS);
3331 }
3332 
3333 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer)
3334 {
3335   PetscBool isbinary;
3336 
3337   PetscFunctionBegin;
3338   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3339   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3340   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
3341   PetscFunctionReturn(PETSC_SUCCESS);
3342 }
3343 
3344 /*@
3345   MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3346 
3347   Input Parameters:
3348 + mat  - the matrix
3349 - fact - factor
3350 
3351   Options Database Key:
3352 . -mat_use_hash_table <fact> - provide the factor
3353 
3354   Level: advanced
3355 
3356 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()`
3357 @*/
3358 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact)
3359 {
3360   PetscFunctionBegin;
3361   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
3362   PetscFunctionReturn(PETSC_SUCCESS);
3363 }
3364 
3365 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact)
3366 {
3367   Mat_MPIBAIJ *baij;
3368 
3369   PetscFunctionBegin;
3370   baij          = (Mat_MPIBAIJ *)mat->data;
3371   baij->ht_fact = fact;
3372   PetscFunctionReturn(PETSC_SUCCESS);
3373 }
3374 
3375 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
3376 {
3377   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3378   PetscBool    flg;
3379 
3380   PetscFunctionBegin;
3381   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
3382   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
3383   if (Ad) *Ad = a->A;
3384   if (Ao) *Ao = a->B;
3385   if (colmap) *colmap = a->garray;
3386   PetscFunctionReturn(PETSC_SUCCESS);
3387 }
3388 
3389 /*
3390     Special version for direct calls from Fortran (to eliminate two function call overheads
3391 */
3392 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3393   #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3394 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3395   #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3396 #endif
3397 
3398 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name
3399 /*@C
3400   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
3401 
3402   Collective
3403 
3404   Input Parameters:
3405 + matin  - the matrix
3406 . min    - number of input rows
3407 . im     - input rows
3408 . nin    - number of input columns
3409 . in     - input columns
3410 . v      - numerical values input
3411 - addvin - `INSERT_VALUES` or `ADD_VALUES`
3412 
3413   Level: advanced
3414 
3415   Developer Notes:
3416   This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
3417 
3418 .seealso: `Mat`, `MatSetValuesBlocked()`
3419 @*/
3420 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin)
3421 {
3422   /* convert input arguments to C version */
3423   Mat        mat = *matin;
3424   PetscInt   m = *min, n = *nin;
3425   InsertMode addv = *addvin;
3426 
3427   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
3428   const MatScalar *value;
3429   MatScalar       *barray      = baij->barray;
3430   PetscBool        roworiented = baij->roworiented;
3431   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
3432   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3433   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
3434 
3435   PetscFunctionBegin;
3436   /* tasks normally handled by MatSetValuesBlocked() */
3437   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3438   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
3439   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
3440   if (mat->assembled) {
3441     mat->was_assembled = PETSC_TRUE;
3442     mat->assembled     = PETSC_FALSE;
3443   }
3444   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
3445 
3446   if (!barray) {
3447     PetscCall(PetscMalloc1(bs2, &barray));
3448     baij->barray = barray;
3449   }
3450 
3451   if (roworiented) stepval = (n - 1) * bs;
3452   else stepval = (m - 1) * bs;
3453 
3454   for (i = 0; i < m; i++) {
3455     if (im[i] < 0) continue;
3456     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
3457     if (im[i] >= rstart && im[i] < rend) {
3458       row = im[i] - rstart;
3459       for (j = 0; j < n; j++) {
3460         /* If NumCol = 1 then a copy is not required */
3461         if ((roworiented) && (n == 1)) {
3462           barray = (MatScalar *)v + i * bs2;
3463         } else if ((!roworiented) && (m == 1)) {
3464           barray = (MatScalar *)v + j * bs2;
3465         } else { /* Here a copy is required */
3466           if (roworiented) {
3467             value = v + i * (stepval + bs) * bs + j * bs;
3468           } else {
3469             value = v + j * (stepval + bs) * bs + i * bs;
3470           }
3471           for (ii = 0; ii < bs; ii++, value += stepval) {
3472             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
3473           }
3474           barray -= bs2;
3475         }
3476 
3477         if (in[j] >= cstart && in[j] < cend) {
3478           col = in[j] - cstart;
3479           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3480         } else if (in[j] < 0) {
3481           continue;
3482         } else {
3483           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
3484           if (mat->was_assembled) {
3485             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
3486 
3487 #if defined(PETSC_USE_DEBUG)
3488   #if defined(PETSC_USE_CTABLE)
3489             {
3490               PetscInt data;
3491               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
3492               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3493             }
3494   #else
3495             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
3496   #endif
3497 #endif
3498 #if defined(PETSC_USE_CTABLE)
3499             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
3500             col = (col - 1) / bs;
3501 #else
3502             col = (baij->colmap[in[j]] - 1) / bs;
3503 #endif
3504             if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) {
3505               PetscCall(MatDisAssemble_MPIBAIJ(mat));
3506               col = in[j];
3507             }
3508           } else col = in[j];
3509           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
3510         }
3511       }
3512     } else {
3513       if (!baij->donotstash) {
3514         if (roworiented) {
3515           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3516         } else {
3517           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
3518         }
3519       }
3520     }
3521   }
3522 
3523   /* task normally handled by MatSetValuesBlocked() */
3524   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
3525   PetscFunctionReturn(PETSC_SUCCESS);
3526 }
3527 
3528 /*@
3529   MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows.
3530 
3531   Collective
3532 
3533   Input Parameters:
3534 + comm - MPI communicator
3535 . bs   - the block size, only a block size of 1 is supported
3536 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
3537 . n    - This value should be the same as the local size used in creating the
3538          x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have
3539          calculated if `N` is given) For square matrices `n` is almost always `m`.
3540 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
3541 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
3542 . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3543 . j    - column indices
3544 - a    - matrix values
3545 
3546   Output Parameter:
3547 . mat - the matrix
3548 
3549   Level: intermediate
3550 
3551   Notes:
3552   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
3553   thus you CANNOT change the matrix entries by changing the values of a[] after you have
3554   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3555 
3556   The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3557   the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3558   block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3559   with column-major ordering within blocks.
3560 
3561   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3562 
3563 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3564           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3565 @*/
3566 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
3567 {
3568   PetscFunctionBegin;
3569   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3570   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
3571   PetscCall(MatCreate(comm, mat));
3572   PetscCall(MatSetSizes(*mat, m, n, M, N));
3573   PetscCall(MatSetType(*mat, MATMPIBAIJ));
3574   PetscCall(MatSetBlockSize(*mat, bs));
3575   PetscCall(MatSetUp(*mat));
3576   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
3577   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
3578   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3579   PetscFunctionReturn(PETSC_SUCCESS);
3580 }
3581 
3582 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3583 {
3584   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3585   PetscInt    *indx;
3586   PetscScalar *values;
3587 
3588   PetscFunctionBegin;
3589   PetscCall(MatGetSize(inmat, &m, &N));
3590   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3591     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
3592     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3593     PetscInt    *bindx, rmax = a->rmax, j;
3594     PetscMPIInt  rank, size;
3595 
3596     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3597     mbs = m / bs;
3598     Nbs = N / cbs;
3599     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3600     nbs = n / cbs;
3601 
3602     PetscCall(PetscMalloc1(rmax, &bindx));
3603     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
3604 
3605     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3606     PetscCallMPI(MPI_Comm_rank(comm, &size));
3607     if (rank == size - 1) {
3608       /* Check sum(nbs) = Nbs */
3609       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
3610     }
3611 
3612     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3613     for (i = 0; i < mbs; i++) {
3614       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3615       nnz = nnz / bs;
3616       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
3617       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
3618       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3619     }
3620     PetscCall(PetscFree(bindx));
3621 
3622     PetscCall(MatCreate(comm, outmat));
3623     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
3624     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
3625     PetscCall(MatSetType(*outmat, MATBAIJ));
3626     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
3627     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3628     MatPreallocateEnd(dnz, onz);
3629     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3630   }
3631 
3632   /* numeric phase */
3633   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
3634   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3635 
3636   for (i = 0; i < m; i++) {
3637     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3638     Ii = i + rstart;
3639     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
3640     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3641   }
3642   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
3643   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3644   PetscFunctionReturn(PETSC_SUCCESS);
3645 }
3646